From: notaz Date: Fri, 26 Jul 2013 22:23:56 +0000 (+0300) Subject: rework sh2 sync, again.. X-Git-Tag: v1.85~36 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=picodrive.git;a=commitdiff_plain;h=19886062f1a36f70b1f01d58f3fa1b79162defac rework sh2 sync, again.. also some new debug and poll code VF seems to be ok at least.. --- diff --git a/Makefile b/Makefile index 4df1162..72896e0 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ CFLAGS += -Iplatform/linux/ ifndef DEBUG CFLAGS += -O2 -DNDEBUG endif +#CFLAGS += -DEVT_LOG #CFLAGS += -DDRC_CMP #drc_debug = 4 #profile = 1 diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 92774d0..42de630 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -39,6 +39,15 @@ typedef struct SH2_ void *p_rom; unsigned int pdb_io_csum[2]; +#define SH2_STATE_RUN (1 << 0) // to prevent recursion +#define SH2_STATE_SLEEP (1 << 1) +#define SH2_STATE_CPOLL (1 << 2) // polling comm regs +#define SH2_STATE_VPOLL (1 << 3) // polling VDP + unsigned int state; + unsigned int poll_addr; + int poll_cycles; + int poll_cnt; + // interpreter stuff int icount; // cycles left in current timeslice unsigned int ea; diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 52b73cc..339a852 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -12,6 +12,8 @@ struct Pico32x Pico32x; SH2 sh2s[2]; +#define SH2_IDLE_STATES (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_SLEEP) + static int REGPARM(2) sh2_irq_cb(SH2 *sh2, int level) { if (sh2->pending_irl > sh2->pending_int_irq) { @@ -28,11 +30,15 @@ static int REGPARM(2) sh2_irq_cb(SH2 *sh2, int level) } // if !nested_call, must sync CPUs before calling this -void p32x_update_irls(int nested_call) +void p32x_update_irls(SH2 *active_sh2) { int irqs, mlvl = 0, slvl = 0; + int m68k_cycles = 0; int mrun, srun; + if (active_sh2 != NULL) + m68k_cycles = sh2_cycles_done_m68k(active_sh2); + // msh2 irqs = (Pico32x.sh2irqs | Pico32x.sh2irqi[0]) & ((Pico32x.sh2irq_mask[0] << 3) | P32XI_VRES); while ((irqs >>= 1)) @@ -45,9 +51,14 @@ void p32x_update_irls(int nested_call) slvl++; slvl *= 2; - mrun = sh2_irl_irq(&msh2, mlvl, nested_call); - srun = sh2_irl_irq(&ssh2, slvl, nested_call); - p32x_poll_event(mrun | (srun << 1), 0); + mrun = sh2_irl_irq(&msh2, mlvl, active_sh2 != NULL); + if (mrun) + p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, m68k_cycles); + + srun = sh2_irl_irq(&ssh2, slvl, active_sh2 != NULL); + if (srun) + p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, m68k_cycles); + elprintf(EL_32X, "update_irls: m %d/%d, s %d/%d", mlvl, mrun, slvl, srun); } @@ -158,8 +169,9 @@ void PicoReset32x(void) { if (PicoAHW & PAHW_32X) { Pico32x.sh2irqs |= P32XI_VRES; - p32x_update_irls(0); - p32x_poll_event(3, 0); + p32x_update_irls(NULL); + p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, 0); + p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, 0); p32x_timers_recalc(); } } @@ -204,10 +216,19 @@ static void p32x_start_blank(void) } Pico32x.sh2irqs |= P32XI_VINT; - p32x_update_irls(0); - p32x_poll_event(3, 1); + p32x_update_irls(NULL); + p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); + p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); } +// compare cycles, handling overflows +// check if a > b +#define CYCLES_GT(a, b) \ + ((int)((a) - (b)) > 0) +// check if a >= b +#define CYCLES_GE(a, b) \ + ((int)((a) - (b)) >= 0) + /* events */ static void pwm_irq_event(unsigned int now) { @@ -215,13 +236,14 @@ static void pwm_irq_event(unsigned int now) p32x_pwm_schedule(now); Pico32x.sh2irqs |= P32XI_PWM; - p32x_update_irls(0); + p32x_update_irls(NULL); } static void fillend_event(unsigned int now) { Pico32x.vdp_regs[0x0a/2] &= ~P32XV_nFEN; - p32x_poll_event(3, 1); + p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, now); + p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, now); } typedef void (event_cb)(unsigned int now); @@ -233,18 +255,32 @@ static event_cb *event_cbs[] = { [P32X_EVENT_FILLEND] = fillend_event, }; -// schedule event at some time (in m68k clocks) -void p32x_event_schedule(enum p32x_event event, unsigned int now, int after) +// schedule event at some time 'after', in m68k clocks +void p32x_event_schedule(unsigned int now, enum p32x_event event, int after) { - unsigned int when = (now + after) | 1; + unsigned int when; + + when = (now + after) | 1; elprintf(EL_32X, "new event #%u %u->%u", event, now, when); event_times[event] = when; - if (event_time_next == 0 || (int)(event_time_next - now) > after) + if (event_time_next == 0 || CYCLES_GT(event_time_next, when)) event_time_next = when; } +void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after) +{ + unsigned int now = sh2_cycles_done_m68k(sh2); + int left_to_next; + + p32x_event_schedule(now, event, after); + + left_to_next = (event_time_next - now) * 3; + if (sh2_cycles_left(sh2) > left_to_next) + sh2_end_run(sh2, left_to_next); +} + static void run_events(unsigned int until) { int oldest, oldest_diff, time; @@ -283,13 +319,61 @@ static void run_events(unsigned int until) elprintf(EL_32X, "next event #%d at %u", oldest, event_time_next); } -// compare cycles, handling overflows -// check if a > b -#define CYCLES_GT(a, b) \ - ((int)((a) - (b)) > 0) -// check if a >= b -#define CYCLES_GE(a, b) \ - ((int)((a) - (b)) >= 0) +static inline void run_sh2(SH2 *sh2, int m68k_cycles) +{ + int cycles, done; + + pevt_log_sh2_o(sh2, EVT_RUN_START); + sh2->state |= SH2_STATE_RUN; + cycles = C_M68K_TO_SH2(*sh2, m68k_cycles); + elprintf(EL_32X, "%csh2 +run %u %d", + sh2->is_slave?'s':'m', sh2->m68krcycles_done, cycles); + + done = sh2_execute(sh2, cycles); + + sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, done); + sh2->state &= ~SH2_STATE_RUN; + pevt_log_sh2_o(sh2, EVT_RUN_END); + elprintf(EL_32X, "%csh2 -run %u %d", + sh2->is_slave?'s':'m', sh2->m68krcycles_done, done); +} + +// sync other sh2 to this one +// note: recursive call +void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target) +{ + SH2 *osh2 = &sh2s[sh2->is_slave ^ 1]; + int left_to_event; + int m68k_cycles; + + if (osh2->state & SH2_STATE_RUN) + return; + + m68k_cycles = m68k_target - osh2->m68krcycles_done; + if (m68k_cycles < 200) + return; + + if (osh2->state & SH2_IDLE_STATES) { + osh2->m68krcycles_done = m68k_target; + return; + } + + elprintf(EL_32X, "%csh2 sync to %u %d", + osh2->is_slave?'s':'m', m68k_target, m68k_cycles); + + run_sh2(osh2, m68k_cycles); + + // there might be new event to schedule current sh2 to + if (event_time_next) { + left_to_event = event_time_next - m68k_target; + left_to_event *= 3; + if (sh2_cycles_left(sh2) > left_to_event) { + if (left_to_event < 1) + left_to_event = 1; + sh2_end_run(sh2, left_to_event); + } + } +} #define sync_sh2s_normal p32x_sync_sh2s //#define sync_sh2s_lockstep p32x_sync_sh2s @@ -298,7 +382,7 @@ static void run_events(unsigned int until) void sync_sh2s_normal(unsigned int m68k_target) { unsigned int now, target, timer_cycles; - int cycles, done; + int cycles; elprintf(EL_32X, "sh2 sync to %u", m68k_target); @@ -327,42 +411,50 @@ void sync_sh2s_normal(unsigned int m68k_target) target - msh2.m68krcycles_done, target - ssh2.m68krcycles_done, m68k_target - now, Pico32x.emu_flags); - if (Pico32x.emu_flags & (P32XF_SSH2POLL|P32XF_SSH2VPOLL)) { - ssh2.m68krcycles_done = target; - } - else { + if (!(ssh2.state & SH2_IDLE_STATES)) { cycles = target - ssh2.m68krcycles_done; if (cycles > 0) { - done = sh2_execute(&ssh2, C_M68K_TO_SH2(ssh2, cycles)); - ssh2.m68krcycles_done += C_SH2_TO_M68K(ssh2, done); + run_sh2(&ssh2, cycles); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; } } - if (Pico32x.emu_flags & (P32XF_MSH2POLL|P32XF_MSH2VPOLL)) { - msh2.m68krcycles_done = target; - } - else { + if (!(msh2.state & SH2_IDLE_STATES)) { cycles = target - msh2.m68krcycles_done; if (cycles > 0) { - done = sh2_execute(&msh2, C_M68K_TO_SH2(msh2, cycles)); - msh2.m68krcycles_done += C_SH2_TO_M68K(msh2, done); + run_sh2(&msh2, cycles); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; } } - now = msh2.m68krcycles_done; - if (CYCLES_GT(now, ssh2.m68krcycles_done)) - now = ssh2.m68krcycles_done; + now = target; + if (!(msh2.state & SH2_IDLE_STATES)) { + if (CYCLES_GT(now, msh2.m68krcycles_done)) + now = msh2.m68krcycles_done; + } + if (!(ssh2.state & SH2_IDLE_STATES)) { + if (CYCLES_GT(now, ssh2.m68krcycles_done)) + now = ssh2.m68krcycles_done; + } } p32x_timers_do(now - timer_cycles); timer_cycles = now; } + + // advance idle CPUs + if (msh2.state & SH2_IDLE_STATES) { + if (CYCLES_GT(m68k_target, msh2.m68krcycles_done)) + msh2.m68krcycles_done = m68k_target; + } + if (ssh2.state & SH2_IDLE_STATES) { + if (CYCLES_GT(m68k_target, ssh2.m68krcycles_done)) + ssh2.m68krcycles_done = m68k_target; + } } #define STEP_68K 24 @@ -383,8 +475,8 @@ void sync_sh2s_lockstep(unsigned int m68k_target) #define CPUS_RUN(m68k_cycles,s68k_cycles) do { \ SekRunM68k(m68k_cycles); \ - if (Pico32x.emu_flags & P32XF_68KPOLL) \ - p32x_sync_sh2s(SekCycleCntT + SekCycleCnt); \ + if (Pico32x.emu_flags & (P32XF_68KCPOLL|P32XF_68KVPOLL)) \ + p32x_sync_sh2s(SekCyclesDoneT2()); \ } while (0) #define PICO_32X @@ -396,13 +488,15 @@ void PicoFrame32x(void) if ((Pico32x.vdp_regs[0] & P32XV_Mx) != 0) // no forced blanking Pico32x.vdp_regs[0x0a/2] &= ~P32XV_PEN; // no palette access - p32x_poll_event(3, 1); + p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); + p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); PicoFrameStart(); PicoFrameHints(); sh2_drc_frame(); - elprintf(EL_32X, "poll: %02x", Pico32x.emu_flags); + elprintf(EL_32X, "poll: %02x %02x %02x", + Pico32x.emu_flags & 3, msh2.state, ssh2.state); } // calculate multipliers against 68k clock (7670442) @@ -428,9 +522,9 @@ void Pico32xStateLoaded(int is_early) return; } + SekCycleCnt = 0; sh2s[0].m68krcycles_done = sh2s[1].m68krcycles_done = SekCycleCntT; - p32x_update_irls(0); - p32x_poll_event(3, 0); + p32x_update_irls(NULL); p32x_timers_recalc(); run_events(SekCycleCntT); } diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 2ec7563..db5d045 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -42,13 +42,6 @@ #include "../memory.h" #include "../../cpu/sh2/compiler.h" -#if 0 -#undef ash2_end_run -#undef SekEndRun -#define ash2_end_run(x) -#define SekEndRun(x) -#endif - static const char str_mars[] = "MARS"; void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; @@ -57,64 +50,96 @@ struct Pico32xMem *Pico32xMem; static void bank_switch(int b); // poll detection -#define POLL_THRESHOLD 6 +#define POLL_THRESHOLD 3 -struct poll_det { - u32 addr, cycles, cyc_max; - int cnt, flag; -}; -static struct poll_det m68k_poll, sh2_poll[2]; +static struct { + u32 addr, cycles; + int cnt; +} m68k_poll; -static int p32x_poll_detect(struct poll_det *pd, u32 a, u32 cycles, int is_vdp) +static int m68k_poll_detect(u32 a, u32 cycles, u32 flags) { - int ret = 0, flag = pd->flag; - - if (is_vdp) - flag <<= 3; - - if (a - 2 <= pd->addr && pd->addr <= a + 2 && cycles - pd->cycles <= pd->cyc_max) { - pd->cnt++; - if (pd->cnt > POLL_THRESHOLD) { - if (!(Pico32x.emu_flags & flag)) { - elprintf(EL_32X, "%s poll addr %08x, cyc %u", - flag & (P32XF_68KPOLL|P32XF_68KVPOLL) ? "m68k" : - (flag & (P32XF_MSH2POLL|P32XF_MSH2VPOLL) ? "msh2" : "ssh2"), a, cycles - pd->cycles); + int ret = 0; + + if (a - 2 <= m68k_poll.addr && m68k_poll.addr <= a + 2 + && cycles - m68k_poll.cycles <= 64) + { + if (m68k_poll.cnt++ > POLL_THRESHOLD) { + if (!(Pico32x.emu_flags & flags)) { + elprintf(EL_32X, "m68k poll addr %08x, cyc %u", + a, cycles - m68k_poll.cycles); ret = 1; } - Pico32x.emu_flags |= flag; + Pico32x.emu_flags |= flags; } } else { - pd->cnt = 0; - pd->addr = a; + m68k_poll.cnt = 0; + m68k_poll.addr = a; } - pd->cycles = cycles; + m68k_poll.cycles = cycles; return ret; } -static int p32x_poll_undetect(struct poll_det *pd, int is_vdp) +void p32x_m68k_poll_event(u32 flags) +{ + if (Pico32x.emu_flags & flags) { + elprintf(EL_32X, "m68k poll %02x -> %02x", Pico32x.emu_flags, + Pico32x.emu_flags & ~flags); + Pico32x.emu_flags &= ~flags; + SekSetStop(0); + } + m68k_poll.addr = m68k_poll.cnt = 0; +} + +static void sh2_poll_detect(SH2 *sh2, u32 a, u32 flags) { - int ret = 0, flag = pd->flag; - if (is_vdp) - flag <<= 3; // VDP only + int cycles_left = sh2_cycles_left(sh2); + + if (a == sh2->poll_addr && sh2->poll_cycles - cycles_left <= 10) { + if (sh2->poll_cnt++ > 3) { + if (!(sh2->state & flags)) + elprintf(EL_32X, "%csh2 state: %02x->%02x", sh2->is_slave?'s':'m', + sh2->state, sh2->state | flags); + + sh2->state |= flags; + sh2_end_run(sh2, 1); + pevt_log_sh2(sh2, EVT_POLL_START); + return; + } + } else - flag |= flag << 3; // both - if (Pico32x.emu_flags & flag) { - elprintf(EL_32X, "poll %02x -> %02x", Pico32x.emu_flags, Pico32x.emu_flags & ~flag); - ret = 1; + sh2->poll_cnt = 0; + sh2->poll_addr = a; + sh2->poll_cycles = cycles_left; +} + +void p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) +{ + if (sh2->state & flags) { + elprintf(EL_32X, "%csh2 state: %02x->%02x", sh2->is_slave?'s':'m', + sh2->state, sh2->state & ~flags); + + if (sh2->m68krcycles_done < m68k_cycles) + sh2->m68krcycles_done = m68k_cycles; + + pevt_log_sh2_o(sh2, EVT_POLL_END); } - Pico32x.emu_flags &= ~flag; - pd->addr = pd->cnt = 0; - return ret; + + sh2->state &= ~flags; + sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; } -void p32x_poll_event(int cpu_mask, int is_vdp) +static void sh2s_sync_on_read(SH2 *sh2) { - if (cpu_mask & 1) - p32x_poll_undetect(&sh2_poll[0], is_vdp); - if (cpu_mask & 2) - p32x_poll_undetect(&sh2_poll[1], is_vdp); + int cycles; + if (sh2->poll_cnt != 0) + return; + + cycles = sh2_cycles_done(sh2); + if (cycles > 600) + p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + cycles / 3); } // SH2 faking @@ -165,8 +190,7 @@ static void dma_68k2sh2_do(void) elprintf(EL_32X|EL_ANOMALY, "tcr0 and dreq len differ: %d != %d", dmac0->tcr0, *dreqlen); // HACK: assume bus is busy and SH2 is halted - // XXX: use different mechanism for this, not poll det - Pico32x.emu_flags |= P32XF_MSH2POLL; // id ? P32XF_SSH2POLL : P32XF_MSH2POLL; + msh2.state |= SH2_STATE_SLEEP; for (i = 0; i < Pico32x.dmac_ptr && dmac0->tcr0 > 0; i++) { elprintf(EL_32X, "dmaw [%08x] %04x, left %d", dmac0->dar0, Pico32x.dmac_fifo[i], *dreqlen); @@ -182,7 +206,7 @@ static void dma_68k2sh2_do(void) Pico32x.regs[6 / 2] &= ~P32XS_68S; // transfer complete if (dmac0->tcr0 == 0) { dmac0->chcr0 |= 2; // DMA has ended normally - p32x_poll_undetect(&sh2_poll[0], 0); + p32x_sh2_poll_event(&sh2s[0], SH2_STATE_SLEEP, SekCyclesDoneT()); } } @@ -211,7 +235,7 @@ static u32 p32x_reg_read16(u32 a) p32x_sync_sh2s(cycles); if (Pico32x.comm_dirty_sh2 & comreg) Pico32x.comm_dirty_sh2 &= ~comreg; - else if (p32x_poll_detect(&m68k_poll, a, cycles, 0)) { + else if (m68k_poll_detect(a, cycles, P32XF_68KCPOLL)) { SekSetStop(1); SekEndTimeslice(16); } @@ -255,12 +279,12 @@ static void p32x_reg_write8(u32 a, u32 d) if ((d & 1) && !(Pico32x.sh2irqi[0] & P32XI_CMD)) { p32x_sync_sh2s(SekCyclesDoneT()); Pico32x.sh2irqi[0] |= P32XI_CMD; - p32x_update_irls(0); + p32x_update_irls(NULL); } if ((d & 2) && !(Pico32x.sh2irqi[1] & P32XI_CMD)) { p32x_sync_sh2s(SekCyclesDoneT()); Pico32x.sh2irqi[1] |= P32XI_CMD; - p32x_update_irls(0); + p32x_update_irls(NULL); } return; case 5: // bank @@ -285,14 +309,14 @@ static void p32x_reg_write8(u32 a, u32 d) if (r8[a ^ 1] == d) return; - + comreg = 1 << (a & 0x0f) / 2; if (Pico32x.comm_dirty_68k & comreg) p32x_sync_sh2s(cycles); r8[a ^ 1] = d; - p32x_poll_undetect(&sh2_poll[0], 0); - p32x_poll_undetect(&sh2_poll[1], 0); + p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); Pico32x.comm_dirty_68k |= comreg; if (cycles - (int)msh2.m68krcycles_done > 120) @@ -351,8 +375,8 @@ static void p32x_reg_write16(u32 a, u32 d) p32x_sync_sh2s(cycles); r[a / 2] = d; - p32x_poll_undetect(&sh2_poll[0], 0); - p32x_poll_undetect(&sh2_poll[1], 0); + p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); Pico32x.comm_dirty_68k |= comreg; if (cycles - (int)msh2.m68krcycles_done > 120) @@ -382,9 +406,6 @@ static void p32x_vdp_write8(u32 a, u32 d) u16 *r = Pico32x.vdp_regs; a &= 0x0f; - // for FEN checks between writes - sh2_poll[0].cnt = 0; - // TODO: verify what's writeable switch (a) { case 0x01: @@ -412,7 +433,7 @@ static void p32x_vdp_write8(u32 a, u32 d) } } -static void p32x_vdp_write16(u32 a, u32 d, u32 cycles) +static void p32x_vdp_write16(u32 a, u32 d, SH2 *sh2) { a &= 0x0e; if (a == 6) { // fill start @@ -430,9 +451,10 @@ static void p32x_vdp_write16(u32 a, u32 d, u32 cycles) } Pico32x.vdp_regs[0x06 / 2] = a; Pico32x.vdp_regs[0x08 / 2] = d; - if (cycles > 0) { + if (sh2 != NULL && len > 4) { Pico32x.vdp_regs[0x0a / 2] |= P32XV_nFEN; - p32x_event_schedule(P32X_EVENT_FILLEND, cycles, len); + // supposedly takes 3 bus/6 sh2 cycles? or 3 sh2 cycles? + p32x_event_schedule_sh2(sh2, P32X_EVENT_FILLEND, 3 + len); } return; } @@ -452,8 +474,8 @@ static u32 p32x_sh2reg_read16(u32 a, int cpuid) case 0x00: // adapter/irq ctl return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] | Pico32x.sh2irq_mask[cpuid]; case 0x04: // H count (often as comm too) - if (p32x_poll_detect(&sh2_poll[cpuid], a, ash2_cycles_done(&sh2s[cpuid]), 0)) - ash2_end_run(&sh2s[cpuid], 8); + sh2_poll_detect(&sh2s[cpuid], a, SH2_STATE_CPOLL); + sh2s_sync_on_read(&sh2s[cpuid]); return Pico32x.sh2_regs[4 / 2]; case 0x10: // DREQ len return r[a / 2]; @@ -467,12 +489,12 @@ static u32 p32x_sh2reg_read16(u32 a, int cpuid) int comreg = 1 << (a & 0x0f) / 2; if (Pico32x.comm_dirty_68k & comreg) Pico32x.comm_dirty_68k &= ~comreg; - else if (p32x_poll_detect(&sh2_poll[cpuid], a, ash2_cycles_done(&sh2s[cpuid]), 0)) - ash2_end_run(&sh2s[cpuid], 8); + else + sh2_poll_detect(&sh2s[cpuid], a, SH2_STATE_CPOLL); + sh2s_sync_on_read(&sh2s[cpuid]); return r[a / 2]; } if ((a & 0x30) == 0x30) { - sh2_poll[cpuid].cnt = 0; return p32x_pwm_read16(a); } @@ -482,22 +504,32 @@ static u32 p32x_sh2reg_read16(u32 a, int cpuid) static void p32x_sh2reg_write8(u32 a, u32 d, int cpuid) { a &= 0xff; + + sh2s[cpuid].poll_addr = 0; + switch (a) { case 0: // FM Pico32x.regs[0] &= ~P32XS_FM; Pico32x.regs[0] |= (d << 8) & P32XS_FM; return; - case 1: // + case 1: // HEN/irq masks + if ((d ^ Pico32x.sh2_regs[0]) & 0x80) + elprintf(EL_ANOMALY|EL_32X, "HEN"); Pico32x.sh2irq_mask[cpuid] = d & 0x8f; Pico32x.sh2_regs[0] &= ~0x80; Pico32x.sh2_regs[0] |= d & 0x80; if (d & 1) - p32x_pwm_schedule(sh2s[cpuid].m68krcycles_done); // XXX: timing? - p32x_update_irls(1); + p32x_pwm_schedule_sh2(&sh2s[cpuid]); + p32x_update_irls(&sh2s[cpuid]); return; case 5: // H count - Pico32x.sh2_regs[4 / 2] = d & 0xff; - p32x_poll_undetect(&sh2_poll[cpuid ^ 1], 0); + d &= 0xff; + if (Pico32x.sh2_regs[4 / 2] != d) { + Pico32x.sh2_regs[4 / 2] = d; + p32x_sh2_poll_event(&sh2s[cpuid ^ 1], SH2_STATE_CPOLL, + sh2_cycles_done_m68k(&sh2s[cpuid])); + sh2_end_run(&sh2s[cpuid], 4); + } return; } @@ -508,9 +540,9 @@ static void p32x_sh2reg_write8(u32 a, u32 d, int cpuid) return; r8[a ^ 1] = d; - if (p32x_poll_undetect(&m68k_poll, 0)) - SekSetStop(0); - p32x_poll_undetect(&sh2_poll[cpuid ^ 1], 0); + p32x_m68k_poll_event(P32XF_68KCPOLL); + p32x_sh2_poll_event(&sh2s[cpuid ^ 1], SH2_STATE_CPOLL, + sh2_cycles_done_m68k(&sh2s[cpuid])); comreg = 1 << (a & 0x0f) / 2; Pico32x.comm_dirty_sh2 |= comreg; return; @@ -521,6 +553,8 @@ static void p32x_sh2reg_write16(u32 a, u32 d, int cpuid) { a &= 0xfe; + sh2s[cpuid].poll_addr = 0; + // comm if ((a & 0x30) == 0x20) { int comreg; @@ -528,9 +562,9 @@ static void p32x_sh2reg_write16(u32 a, u32 d, int cpuid) return; Pico32x.regs[a / 2] = d; - if (p32x_poll_undetect(&m68k_poll, 0)) - SekSetStop(0); - p32x_poll_undetect(&sh2_poll[cpuid ^ 1], 0); + p32x_m68k_poll_event(P32XF_68KCPOLL); + p32x_sh2_poll_event(&sh2s[cpuid ^ 1], SH2_STATE_CPOLL, + sh2_cycles_done_m68k(&sh2s[cpuid])); comreg = 1 << (a & 0x0f) / 2; Pico32x.comm_dirty_sh2 |= comreg; return; @@ -553,7 +587,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, int cpuid) case 0x1c: Pico32x.sh2irqs &= ~P32XI_PWM; if (!(Pico32x.emu_flags & P32XF_PWM_PEND)) - p32x_pwm_schedule(sh2s[cpuid].m68krcycles_done); // timing? + p32x_pwm_schedule_sh2(&sh2s[cpuid]); goto irls; } @@ -561,7 +595,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, int cpuid) return; irls: - p32x_update_irls(1); + p32x_update_irls(&sh2s[cpuid]); } // ------------------------------------------------------------------ @@ -693,7 +727,7 @@ static void sh2_peripheral_write32(u32 a, u32 d, int id) dmac0->tcr0 &= 0xffffff; // HACK: assume 68k starts writing soon and end the timeslice - ash2_end_run(&sh2s[id], 16); + sh2_end_run(&sh2s[id], 16); // DREQ is only sent after first 4 words are written. // we do multiple of 4 words to avoid messing up alignment @@ -829,7 +863,7 @@ static void PicoWrite16_32x_on(u32 a, u32 d) } if ((a & 0xfff0) == 0x5180) { // a15180 - p32x_vdp_write16(a, d, 0); // FIXME? + p32x_vdp_write16(a, d, NULL); // FIXME? return; } @@ -1014,8 +1048,7 @@ static u32 sh2_read8_cs0(u32 a, int id) if ((a & 0x3ff00) == 0x4100) { d = p32x_vdp_read16(a); - if (p32x_poll_detect(&sh2_poll[id], a, ash2_cycles_done(&sh2s[id]), 1)) - ash2_end_run(&sh2s[id], 8); + sh2_poll_detect(&sh2s[id], a, SH2_STATE_VPOLL); goto out_16to8; } @@ -1069,8 +1102,7 @@ static u32 sh2_read16_cs0(u32 a, int id) if ((a & 0x3ff00) == 0x4100) { d = p32x_vdp_read16(a); - if (p32x_poll_detect(&sh2_poll[id], a, ash2_cycles_done(&sh2s[id]), 1)) - ash2_end_run(&sh2s[id], 8); + sh2_poll_detect(&sh2s[id], a, SH2_STATE_VPOLL); goto out; } @@ -1116,6 +1148,7 @@ static int REGPARM(3) sh2_write8_cs0(u32 a, u32 d, int id) id ? 's' : 'm', a, d & 0xff, sh2_pc(id)); if ((a & 0x3ff00) == 0x4100) { + sh2s[id].poll_addr = 0; p32x_vdp_write8(a, d); return 0; } @@ -1185,8 +1218,8 @@ static int REGPARM(3) sh2_write16_cs0(u32 a, u32 d, int id) id ? 's' : 'm', a, d & 0xffff, sh2_pc(id)); if ((a & 0x3ff00) == 0x4100) { - sh2_poll[id].cnt = 0; // for poll before VDP accesses - p32x_vdp_write16(a, d, sh2s[id].m68krcycles_done); + sh2s[id].poll_addr = 0; + p32x_vdp_write16(a, d, &sh2s[id]); return 0; } @@ -1596,14 +1629,6 @@ void PicoMemSetup32x(void) msh2.write8_tab = ssh2.write8_tab = (const void **)(void *)sh2_write8_map; msh2.write16_tab = ssh2.write16_tab = (const void **)(void *)sh2_write16_map; - // setup poll detector - m68k_poll.flag = P32XF_68KPOLL; - m68k_poll.cyc_max = 64; - sh2_poll[0].flag = P32XF_MSH2POLL; - sh2_poll[0].cyc_max = 21; - sh2_poll[1].flag = P32XF_SSH2POLL; - sh2_poll[1].cyc_max = 16; - sh2_drc_mem_setup(&msh2); sh2_drc_mem_setup(&ssh2); } @@ -1615,6 +1640,13 @@ void Pico32xMemStateLoaded(void) memset(Pico32xMem->pwm, 0, sizeof(Pico32xMem->pwm)); Pico32x.dirty_pal = 1; + Pico32x.emu_flags &= ~(P32XF_68KCPOLL | P32XF_68KVPOLL); + memset(&m68k_poll, 0, sizeof(m68k_poll)); + msh2.state = 0; + msh2.poll_addr = msh2.poll_cycles = msh2.poll_cnt = 0; + ssh2.state = 0; + ssh2.poll_addr = ssh2.poll_cycles = ssh2.poll_cnt = 0; + sh2_drc_flush_all(); } diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index e3385aa..36261d7 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -86,21 +86,35 @@ void p32x_timers_do(unsigned int cycles) } } -void p32x_pwm_schedule(unsigned int now) +static int p32x_pwm_schedule_(void) { int tm; if (Pico32x.emu_flags & P32XF_PWM_PEND) - return; // already scheduled + return 0; // already scheduled if (Pico32x.sh2irqs & P32XI_PWM) - return; // previous not acked + return 0; // previous not acked if (!((Pico32x.sh2irq_mask[0] | Pico32x.sh2irq_mask[1]) & 1)) - return; // masked by everyone + return 0; // masked by everyone + Pico32x.emu_flags |= P32XF_PWM_PEND; tm = (Pico32x.regs[0x30 / 2] & 0x0f00) >> 8; tm = ((tm - 1) & 0x0f) + 1; - p32x_event_schedule(P32X_EVENT_PWM, now, pwm_cycles * tm / 3); - Pico32x.emu_flags |= P32XF_PWM_PEND; + return pwm_cycles * tm / 3; +} + +void p32x_pwm_schedule(unsigned int now) +{ + int after = p32x_pwm_schedule_(); + if (after != 0) + p32x_event_schedule(now, P32X_EVENT_PWM, after); +} + +void p32x_pwm_schedule_sh2(SH2 *sh2) +{ + int after = p32x_pwm_schedule_(); + if (after != 0) + p32x_event_schedule_sh2(sh2, P32X_EVENT_PWM, after); } unsigned int p32x_pwm_read16(unsigned int a) diff --git a/pico/debug.c b/pico/debug.c index 2370bfa..e579296 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -402,3 +402,137 @@ void PDebugCPUStep(void) SekStepM68k(); } +#ifdef EVT_LOG +static struct evt_t { + unsigned int cycles; + short cpu; + short evt; +} *evts; +static int first_frame; +static int evt_alloc; +static int evt_cnt; + +void pevt_log(unsigned int cycles, enum evt_cpu c, enum evt e) +{ + if (first_frame == 0) + first_frame = Pico.m.frame_count; + if (evt_alloc == evt_cnt) { + evt_alloc = evt_alloc * 2 + 16 * 1024; + evts = realloc(evts, evt_alloc * sizeof(evts[0])); + } + evts[evt_cnt].cycles = cycles; + evts[evt_cnt].cpu = c; + evts[evt_cnt].evt = e; + evt_cnt++; +} + +static int evt_cmp(const void *p1, const void *p2) +{ + const struct evt_t *e1 = p1, *e2 = p2; + int ret = (int)(e1->cycles - e2->cycles); + if (ret) + return ret; + if (e1->evt == EVT_RUN_END || e1->evt == EVT_POLL_END) + return -1; + if (e1->evt == EVT_RUN_START || e1->evt == EVT_POLL_START) + return 1; + if (e2->evt == EVT_RUN_END || e2->evt == EVT_POLL_END) + return 1; + if (e1->evt == EVT_RUN_START || e1->evt == EVT_POLL_START) + return -1; + return 0; +} + +void pevt_dump(void) +{ + static const char *evt_names[EVT_CNT] = { + "x", "x", "+run", "-run", "+poll", "-poll", + }; + char evt_print[EVT_CPU_CNT][EVT_CNT] = {{0,}}; + unsigned int start_cycles[EVT_CPU_CNT] = {0,}; + unsigned int run_cycles[EVT_CPU_CNT] = {0,}; + unsigned int frame_cycles[EVT_CPU_CNT] = {0,}; + unsigned int frame_resched[EVT_CPU_CNT] = {0,}; + unsigned int cycles = 0; + int frame = first_frame - 1; + int line = 0; + int cpu_mask = 0; + int dirty = 0; + int i; + + qsort(evts, evt_cnt, sizeof(evts[0]), evt_cmp); + + for (i = 0; i < evt_cnt; i++) { + int c = evts[i].cpu, e = evts[i].evt; + int ei, ci; + + if (cycles != evts[i].cycles || (cpu_mask & (1 << c)) + || e == EVT_FRAME_START || e == EVT_NEXT_LINE) + { + if (dirty) { + printf("%u:%03u:%u ", frame, line, cycles); + for (ci = 0; ci < EVT_CPU_CNT; ci++) { + int found = 0; + for (ei = 0; ei < EVT_CNT; ei++) { + if (evt_print[ci][ei]) { + if (ei == EVT_RUN_END) { + printf("%8s%4d", evt_names[ei], run_cycles[ci]); + run_cycles[ci] = 0; + } + else + printf("%8s ", evt_names[ei]); + found = 1; + } + } + if (!found) + printf("%12s", ""); + } + printf("\n"); + memset(evt_print, 0, sizeof(evt_print)); + cpu_mask = 0; + dirty = 0; + } + cycles = evts[i].cycles; + } + + switch (e) { + case EVT_FRAME_START: + frame++; + line = 0; + printf("%u:%03u:%u ", frame, line, cycles); + for (ci = 0; ci < EVT_CPU_CNT; ci++) { + printf("%12u", frame_cycles[ci]); + frame_cycles[ci] = 0; + } + printf("\n"); + printf("%u:%03u:%u ", frame, line, cycles); + for (ci = 0; ci < EVT_CPU_CNT; ci++) { + printf("%12u", frame_resched[ci]); + frame_resched[ci] = 0; + } + printf("\n"); + break; + case EVT_NEXT_LINE: + line++; + printf("%u:%03u:%u\n", frame, line, cycles); + break; + case EVT_RUN_START: + start_cycles[c] = cycles; + goto default_; + case EVT_RUN_END: + run_cycles[c] += cycles - start_cycles[c]; + frame_cycles[c] += cycles - start_cycles[c]; + frame_resched[c]++; + goto default_; + default_: + default: + evt_print[c][e] = 1; + cpu_mask |= 1 << c; + dirty = 1; + break; + } + } +} +#endif + +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/pico.c b/pico/pico.c index 9a1faca..80ffb90 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -54,6 +54,7 @@ void PicoExit(void) if (SRam.data) free(SRam.data); + pevt_dump(); } void PicoPower(void) diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 12f649f..9099c85 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -30,9 +30,12 @@ static __inline void SekRunM68k(int cyc) { int cyc_do; pprof_start(m68k); + pevt_log_m68k_o(EVT_RUN_START); SekCycleAim+=cyc; - if ((cyc_do=SekCycleAim-SekCycleCnt) <= 0) return; + if ((cyc_do=SekCycleAim-SekCycleCnt) <= 0) + goto out; + #if defined(EMU_CORE_DEBUG) // this means we do run-compare SekCycleCnt+=CM_compareRun(cyc_do, 0); @@ -46,6 +49,8 @@ static __inline void SekRunM68k(int cyc) SekCycleCnt+=fm68k_emulate(cyc_do+1, 0, 0); #endif +out: + pevt_log_m68k_o(EVT_RUN_END); pprof_end(m68k); } @@ -55,6 +60,7 @@ static int PicoFrameHints(void) int lines, y, lines_vis = 224, line_sample, skip, vcnt_wrap; int hint; // Hint counter + pevt_log_m68k_o(EVT_FRAME_START); pv->v_counter = Pico.m.scanline = 0; if ((PicoOpt&POPT_ALT_RENDERER) && !PicoSkipFrame && (pv->reg[1]&0x40)) { // fast rend., display enabled @@ -144,7 +150,7 @@ static int PicoFrameHints(void) if (ym2612.dacen && PsndDacLine <= y) PsndDoDAC(y); #ifdef PICO_32X - p32x_sync_sh2s(SekCycleCntT + SekCycleCnt); + p32x_sync_sh2s(SekCyclesDoneT2()); #endif PsndGetSamples(y); } @@ -158,6 +164,7 @@ static int PicoFrameHints(void) #else if (PicoLineHook) PicoLineHook(); #endif + pevt_log_m68k_o(EVT_NEXT_LINE); } if (!skip) @@ -213,7 +220,7 @@ static int PicoFrameHints(void) } #ifdef PICO_32X - p32x_sync_sh2s(SekCycleCntT + SekCycleCnt); + p32x_sync_sh2s(SekCyclesDoneT2()); p32x_start_blank(); #endif @@ -235,6 +242,7 @@ static int PicoFrameHints(void) #else if (PicoLineHook) PicoLineHook(); #endif + pevt_log_m68k_o(EVT_NEXT_LINE); lines = scanlines_total; vcnt_wrap = Pico.m.pal ? 0x103 : 0xEB; // based on Gens, TODO: verify @@ -262,6 +270,7 @@ static int PicoFrameHints(void) #else if (PicoLineHook) PicoLineHook(); #endif + pevt_log_m68k_o(EVT_NEXT_LINE); } // sync z80 @@ -271,7 +280,7 @@ static int PicoFrameHints(void) PsndDoDAC(lines-1); #ifdef PICO_32X - p32x_sync_sh2s(SekCycleCntT + SekCycleCnt); + p32x_sync_sh2s(SekCyclesDoneT2()); #endif timers_cycle(); diff --git a/pico/pico_int.h b/pico/pico_int.h index a654c5a..bfdf73a 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -146,6 +146,7 @@ extern unsigned int SekCycleCntT; // total cycle counter, updated once per frame #define SekCyclesBurn(c) SekCycleCnt+=c #define SekCyclesDone() (SekCycleAim-SekCyclesLeft) // number of cycles done in this frame (can be checked anywhere) #define SekCyclesDoneT() (SekCycleCntT+SekCyclesDone()) // total nuber of cycles done for this rom +#define SekCyclesDoneT2() (SekCycleCntT + SekCycleCnt) // same as above but not from memhandlers #define SekEndRun(after) { \ SekCycleCnt -= SekCyclesLeft - (after); \ @@ -237,27 +238,31 @@ extern SH2 sh2s[2]; #define ssh2 sh2s[1] #ifndef DRC_SH2 -# define ash2_end_run(sh2, after) do { \ - if ((sh2)->icount > (after)) { \ +# define sh2_end_run(sh2, after_) do { \ + if ((sh2)->icount > (after_)) { \ (sh2)->cycles_timeslice -= (sh2)->icount; \ - (sh2)->icount = after; \ + (sh2)->icount = after_; \ } \ } while (0) -# define ash2_cycles_done(sh2) ((sh2)->cycles_timeslice - (sh2)->icount) +# define sh2_cycles_left(sh2) (sh2)->icount # define sh2_pc(c) (c) ? ssh2.ppc : msh2.ppc #else -# define ash2_end_run(sh2, after) do { \ - int left = (sh2)->sr >> 12; \ - if (left > (after)) { \ - (sh2)->cycles_timeslice -= left; \ +# define sh2_end_run(sh2, after_) do { \ + int left_ = (signed int)(sh2)->sr >> 12; \ + if (left_ > (after_)) { \ + (sh2)->cycles_timeslice -= left_; \ (sh2)->sr &= 0xfff; \ - (sh2)->sr |= (after) << 12; \ + (sh2)->sr |= (after_) << 12; \ } \ } while (0) -# define ash2_cycles_done(sh2) ((sh2)->cycles_timeslice - ((sh2)->sr >> 12)) +# define sh2_cycles_left(sh2) ((signed int)(sh2)->sr >> 12) # define sh2_pc(c) (c) ? ssh2.pc : msh2.pc #endif +#define sh2_cycles_done(sh2) ((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2)) +#define sh2_cycles_done_m68k(sh2) \ + ((sh2)->m68krcycles_done + (sh2_cycles_done(sh2) / 3)) + #define sh2_reg(c, x) (c) ? ssh2.r[x] : msh2.r[x] #define sh2_gbr(c) (c) ? ssh2.gbr : msh2.gbr #define sh2_vbr(c) (c) ? ssh2.vbr : msh2.vbr @@ -463,12 +468,8 @@ typedef struct #define P32XP_FULL (1<<15) // PWM #define P32XP_EMPTY (1<<14) -#define P32XF_68KPOLL (1 << 0) -#define P32XF_MSH2POLL (1 << 1) -#define P32XF_SSH2POLL (1 << 2) -#define P32XF_68KVPOLL (1 << 3) -#define P32XF_MSH2VPOLL (1 << 4) -#define P32XF_SSH2VPOLL (1 << 5) +#define P32XF_68KCPOLL (1 << 0) +#define P32XF_68KVPOLL (1 << 1) #define P32XF_PWM_PEND (1 << 6) #define P32XI_VRES (1 << 14/2) // IRL/2 @@ -737,9 +738,11 @@ void PicoUnload32x(void); void PicoFrame32x(void); void Pico32xStateLoaded(int is_early); void p32x_sync_sh2s(unsigned int m68k_target); -void p32x_update_irls(int nested_call); +void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target); +void p32x_update_irls(SH2 *active_sh2); void p32x_reset_sh2s(void); -void p32x_event_schedule(enum p32x_event event, unsigned int now, int after); +void p32x_event_schedule(unsigned int now, enum p32x_event event, int after); +void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after); // 32x/memory.c struct Pico32xMem *Pico32xMem; @@ -750,7 +753,8 @@ void PicoWrite16_32x(unsigned int a, unsigned int d); void PicoMemSetup32x(void); void Pico32xSwapDRAM(int b); void Pico32xMemStateLoaded(void); -void p32x_poll_event(int cpu_mask, int is_vdp); +void p32x_m68k_poll_event(unsigned int flags); +void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); // 32x/draw.c void FinalizeLine32xRGB555(int sh, int line); @@ -772,6 +776,7 @@ void p32x_pwm_update(int *buf32, int length, int stereo); void p32x_timers_do(unsigned int cycles); void p32x_timers_recalc(void); void p32x_pwm_schedule(unsigned int now); +void p32x_pwm_schedule_sh2(SH2 *sh2); #else #define Pico32xInit() #define PicoPower32x() @@ -848,6 +853,45 @@ do { \ #define pprof_end_sub(...) #endif +#ifdef EVT_LOG +enum evt { + EVT_FRAME_START, + EVT_NEXT_LINE, + EVT_RUN_START, + EVT_RUN_END, + EVT_POLL_START, + EVT_POLL_END, + EVT_CNT +}; + +enum evt_cpu { + EVT_M68K, + EVT_S68K, + EVT_MSH2, + EVT_SSH2, + EVT_CPU_CNT +}; + +void pevt_log(unsigned int cycles, enum evt_cpu c, enum evt e); +void pevt_dump(void); + +#define pevt_log_m68k(e) \ + pevt_log(SekCyclesDoneT(), EVT_M68K, e) +#define pevt_log_m68k_o(e) \ + pevt_log(SekCyclesDoneT2(), EVT_M68K, e) +#define pevt_log_sh2(sh2, e) \ + pevt_log(sh2_cycles_done_m68k(sh2), EVT_MSH2 + (sh2)->is_slave, e) +#define pevt_log_sh2_o(sh2, e) \ + pevt_log((sh2)->m68krcycles_done, EVT_MSH2 + (sh2)->is_slave, e) +#else +#define pevt_log(c, e) +#define pevt_log_m68k(e) +#define pevt_log_m68k_o(e) +#define pevt_log_sh2(sh2, e) +#define pevt_log_sh2_o(sh2, e) +#define pevt_dump() +#endif + // misc #ifdef _MSC_VER #define cdprintf