From 74cc7aebf6a63af0506d311353585329c00f616f Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 13 Apr 2020 22:20:13 +0200 Subject: [PATCH] sh2 timer optimization --- cpu/sh2/sh2.h | 1 + pico/32x/32x.c | 10 ++++++++-- pico/32x/memory.c | 14 +++++++------- pico/32x/sh2soc.c | 46 +++++++++++++++++++++++----------------------- pico/pico_int.h | 3 ++- 5 files changed, 41 insertions(+), 33 deletions(-) diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 2f2dfd92..aabe45be 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -48,6 +48,7 @@ typedef struct SH2_ #define SH2_STATE_CPOLL (1 << 2) // polling comm regs #define SH2_STATE_VPOLL (1 << 3) // polling VDP #define SH2_STATE_RPOLL (1 << 4) // polling address in SDRAM +#define SH2_TIMER_RUN (1 << 8) // SOC WDT timer is running unsigned int state; uint32_t poll_addr; int poll_cycles; diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 0f0cc4f5..ddd03fa8 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -508,12 +508,18 @@ void sync_sh2s_normal(unsigned int m68k_target) now = ssh2.m68krcycles_done; } if (CYCLES_GT(now, timer_cycles+STEP_N)) { - p32x_timers_do(now - timer_cycles); + if (msh2.state & SH2_TIMER_RUN) + p32x_timer_do(&msh2, now - timer_cycles); + if (ssh2.state & SH2_TIMER_RUN) + p32x_timer_do(&ssh2, now - timer_cycles); timer_cycles = now; } } - p32x_timers_do(now - timer_cycles); + if (msh2.state & SH2_TIMER_RUN) + p32x_timer_do(&msh2, now - timer_cycles); + if (ssh2.state & SH2_TIMER_RUN) + p32x_timer_do(&ssh2, now - timer_cycles); timer_cycles = now; } pprof_end_sub(m68k); diff --git a/pico/32x/memory.c b/pico/32x/memory.c index f4f0a18b..3f597288 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -111,7 +111,7 @@ void p32x_m68k_poll_event(u32 flags) m68k_poll.addr1 = m68k_poll.addr2 = m68k_poll.cnt = 0; } -static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) +void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) { u32 cycles_done = sh2_cycles_done_t(sh2); @@ -275,7 +275,7 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2) d = (s16)sh2_poll_read(a, d, cycles, sh2); } - sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); DRC_RESTORE_SR(sh2); return d; @@ -296,7 +296,7 @@ u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2) ((u16)sh2_poll_read(a+2, d, cycles, sh2)); } - sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); DRC_RESTORE_SR(sh2); return d; @@ -735,7 +735,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] | Pico32x.sh2irq_mask[sh2->is_slave]; case 0x04/2: // H count (often as comm too) - sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); cycles = sh2_cycles_done_m68k(sh2); sh2s_sync_on_read(sh2, cycles); return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2); @@ -769,7 +769,7 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) case 0x2a/2: case 0x2c/2: case 0x2e/2: - sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); cycles = sh2_cycles_done_m68k(sh2); sh2s_sync_on_read(sh2, cycles); return sh2_poll_read(a, r[a / 2], cycles, sh2); @@ -1456,7 +1456,7 @@ static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out_16to8; } @@ -1519,7 +1519,7 @@ static u32 REGPARM(2) sh2_read16_cs0(u32 a, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out; } diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index cf11666d..8895d49b 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -209,6 +209,9 @@ void p32x_timers_recalc(void) // SH2 timer step for (i = 0; i < 2; i++) { + sh2s[i].state &= ~SH2_TIMER_RUN; + if (PREG8(sh2s[i].peri_regs, 0x80) & 0x20) // TME + sh2s[i].state |= SH2_TIMER_RUN; tmp = PREG8(sh2s[i].peri_regs, 0x80) & 7; // Sclk cycles per timer tick if (tmp) @@ -222,32 +225,29 @@ void p32x_timers_recalc(void) } } -void p32x_timers_do(unsigned int m68k_slice) +NOINLINE void p32x_timer_do(SH2 *sh2, unsigned int m68k_slice) { unsigned int cycles = m68k_slice * 3; - int cnt, i; - - // WDT timers - for (i = 0; i < 2; i++) { - void *pregs = sh2s[i].peri_regs; - if (PREG8(pregs, 0x80) & 0x20) { // TME - timer_cycles[i] += cycles; - // cnt = timer_cycles[i] / timer_tick_cycles[i]; - cnt = (1ULL * timer_cycles[i] * timer_tick_factor[i]) >> 32; - timer_cycles[i] -= timer_tick_cycles[i] * cnt; - if (timer_cycles[i] > timer_tick_cycles[i]) - timer_cycles[i] -= timer_tick_cycles[i], cnt++; - cnt += PREG8(pregs, 0x81); - if (cnt >= 0x100) { - int level = PREG8(pregs, 0xe3) >> 4; - int vector = PREG8(pregs, 0xe4) & 0x7f; - elprintf(EL_32XP, "%csh2 WDT irq (%d, %d)", - i ? 's' : 'm', level, vector); - sh2_internal_irq(&sh2s[i], level, vector); - cnt &= 0xff; - } - PREG8(pregs, 0x81) = cnt; + void *pregs = sh2->peri_regs; + int cnt; int i = sh2->is_slave; + + // WDT timer + timer_cycles[i] += cycles; + if (timer_cycles[i] > timer_tick_cycles[i]) { + // cnt = timer_cycles[i] / timer_tick_cycles[i]; + cnt = (1ULL * timer_cycles[i] * timer_tick_factor[i]) >> 32; + timer_cycles[i] -= timer_tick_cycles[i] * cnt; + + cnt += PREG8(pregs, 0x81); + if (cnt >= 0x100) { + int level = PREG8(pregs, 0xe3) >> 4; + int vector = PREG8(pregs, 0xe4) & 0x7f; + elprintf(EL_32XP, "%csh2 WDT irq (%d, %d)", + i ? 's' : 'm', level, vector); + sh2_internal_irq(sh2, level, vector); + cnt &= 0xff; } + PREG8(pregs, 0x81) = cnt; } } diff --git a/pico/pico_int.h b/pico/pico_int.h index 5fed483d..e4bd4c1e 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -977,6 +977,7 @@ unsigned int REGPARM(3) p32x_sh2_poll_memory8(unsigned int a, unsigned int d, SH unsigned int REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, unsigned int d, SH2 *sh2); unsigned int REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, unsigned int d, SH2 *sh2); void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2); +void p32x_sh2_poll_detect(unsigned int a, SH2 *sh2, unsigned int flags, int maxcnt); void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); int p32x_sh2_memcpy(unsigned int dst, unsigned int src, int count, int size, SH2 *sh2); @@ -1012,7 +1013,7 @@ void p32x_pwm_state_loaded(void); void p32x_dreq0_trigger(void); void p32x_dreq1_trigger(void); void p32x_timers_recalc(void); -void p32x_timers_do(unsigned int m68k_slice); +void p32x_timer_do(SH2 *sh2, unsigned int m68k_slice); void sh2_peripheral_reset(SH2 *sh2); unsigned int REGPARM(2) sh2_peripheral_read8(unsigned int a, SH2 *sh2); unsigned int REGPARM(2) sh2_peripheral_read16(unsigned int a, SH2 *sh2); -- 2.39.2