From 1891e649e58c7a4499649cc29aded5da7713d4c2 Mon Sep 17 00:00:00 2001 From: kub Date: Tue, 25 Jun 2019 20:15:48 +0200 Subject: [PATCH] 32X: memory access and polling bug fixes --- pico/32x/memory.c | 36 ++++++++++++++++++++++-------------- pico/32x/memory_arm.S | 14 ++++++-------- pico/32x/sh2soc.c | 6 +++++- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 70287a2c..7148d41c 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -74,7 +74,7 @@ static int m68k_poll_detect(u32 a, u32 cycles, u32 flags) if (match && cycles - m68k_poll.cycles <= 64 && !SekNotPolling) { // detect split 32bit access by same cycle count, and ignore those - if (cycles != m68k_poll.cycles && ++m68k_poll.cnt > POLL_THRESHOLD) { + if (cycles != m68k_poll.cycles && ++m68k_poll.cnt >= POLL_THRESHOLD) { if (!(Pico32x.emu_flags & flags)) { elprintf(EL_32X, "m68k poll addr %08x, cyc %u", a, cycles - m68k_poll.cycles); @@ -118,7 +118,7 @@ static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) // by checking address (max 2 bytes away) and cycles (max 2 cycles later). // no polling if more than 20 cycles have passed since last detect call. if (a - sh2->poll_addr <= 2 && CYCLES_GE(sh2->poll_cycles+20, cycles_done)) { - if (CYCLES_GT(cycles_done,sh2->poll_cycles+2) && ++sh2->poll_cnt > maxcnt) { + if (CYCLES_GT(cycles_done,sh2->poll_cycles+2) && ++sh2->poll_cnt >= maxcnt) { if (!(sh2->state & flags)) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state | flags); @@ -131,6 +131,8 @@ static void NOINLINE sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) if ((a & 0xc6000000) == 0x06000000) { unsigned char *p = sh2->p_drcblk_ram; p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] |= 0x80; + // mark next word too to enable poll fifo for 32bit access + p[((a+2) & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] |= 0x80; } #endif } @@ -148,7 +150,7 @@ void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state & ~flags); - if (sh2->m68krcycles_done < m68k_cycles) + if (sh2->m68krcycles_done < m68k_cycles && !(sh2->state & SH2_STATE_RUN)) sh2->m68krcycles_done = m68k_cycles; pevt_log_sh2_o(sh2, EVT_POLL_END); @@ -174,12 +176,12 @@ static void sh2s_sync_on_read(SH2 *sh2) // This is used to correctly deliver syncronisation data to the 3 cpus. The // fifo stores 16 bit values, 8/32 bit accesses must be adapted accordingly. #define PFIFO_SZ 4 -#define PFIFO_CNT 4 +#define PFIFO_CNT 8 struct sh2_poll_fifo { u32 cycles; u32 a; u16 d; - u16 cpu; + int cpu; } sh2_poll_fifo[PFIFO_CNT][PFIFO_SZ]; unsigned sh2_poll_rd[PFIFO_CNT], sh2_poll_wr[PFIFO_CNT]; // ringbuffer pointers @@ -191,6 +193,7 @@ static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2) int cpu = sh2 ? sh2->is_slave+1 : 0; unsigned idx; + a &= ~0x20000000; // ignore writethrough bit // fetch oldest write to address from fifo, but stop when reaching the present idx = sh2_poll_rd[hix]; while (idx != sh2_poll_wr[hix] && CYCLES_GE(cycles, fifo[idx].cycles)) { @@ -225,6 +228,7 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; int cpu = sh2 ? sh2->is_slave+1 : 0; + a &= ~0x20000000; // ignore writethrough bit // fold 2 consecutive writes to the same address to avoid reading of // intermediate values that may cause synchronisation problems. // NB this can take an eternity on m68k: mov.b , needs @@ -279,8 +283,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2) sh2s_sync_on_read(sh2); cycles = sh2_cycles_done_m68k(sh2); // check poll fifo and sign-extend the result correctly - d = sh2_poll_read(a, d, cycles, sh2) | - (sh2_poll_read(a+2, d >> 16, cycles, sh2) << 16); + d = (sh2_poll_read(a, d >> 16, cycles, sh2) << 16) | + ((u16)sh2_poll_read(a+2, d, cycles, sh2)); } sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 5); @@ -1503,7 +1507,7 @@ static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) // writes #ifdef DRC_SH2 -static void NOINLINE sh2_sdram_poll(u32 a, u16 d, SH2 *sh2) +static void NOINLINE sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) { unsigned cycles; @@ -1525,8 +1529,8 @@ void NOINLINE sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, int t) void NOINLINE sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, int t) { - sh2_sdram_checks(a, d, sh2, t); - sh2_sdram_checks(a+2, d>>16, sh2, t>>16); + sh2_sdram_checks(a, d>>16, sh2, t); + sh2_sdram_checks(a+2, d, sh2, t>>16); } #ifndef _ASM_32X_MEMORY_C @@ -1568,6 +1572,7 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) } if ((a & 0x3fe00) == 0x4200) { + sh2->poll_cnt = 0; ((u8 *)Pico32xMem->pal)[(a & 0x1ff) ^ 1] = d; Pico32x.dirty_pal = 1; goto out; @@ -1641,6 +1646,7 @@ static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) } if ((a & 0x3fe00) == 0x4200) { + sh2->poll_cnt = 0; Pico32xMem->pal[(a & 0x1ff) / 2] = d; Pico32x.dirty_pal = 1; goto out; @@ -2175,11 +2181,7 @@ void Pico32xSwapDRAM(int b) ssh2_read32_map[0x04/2].addr = ssh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); // convenience ptrs - msh2.p_sdram = ssh2.p_sdram = Pico32xMem->sdram; msh2.p_dram = ssh2.p_dram = Pico32xMem->dram[b]; - msh2.p_rom = ssh2.p_rom = Pico.rom; - msh2.p_bios = Pico32xMem->sh2_rom_m.w; msh2.p_da = msh2.data_array; - ssh2.p_bios = Pico32xMem->sh2_rom_s.w; ssh2.p_da = ssh2.data_array; } static void bank_switch_rom_sh2(void) @@ -2359,6 +2361,12 @@ void PicoMemSetup32x(void) ssh2.write16_tab = (const void **)(void *)ssh2_write16_map; ssh2.write32_tab = (const void **)(void *)ssh2_write32_map; + // convenience ptrs + msh2.p_sdram = ssh2.p_sdram = Pico32xMem->sdram; + msh2.p_rom = ssh2.p_rom = Pico.rom; + msh2.p_bios = Pico32xMem->sh2_rom_m.w; msh2.p_da = msh2.data_array; + ssh2.p_bios = Pico32xMem->sh2_rom_s.w; ssh2.p_da = ssh2.data_array; + sh2_drc_mem_setup(&msh2); sh2_drc_mem_setup(&ssh2); diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index 48143ba9..43a01958 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -18,7 +18,7 @@ .text -@ u32 a +@ u32 a, SH2 *sh2 .global sh2_read8_rom .global sh2_read8_sdram .global sh2_read8_da @@ -32,7 +32,7 @@ .global sh2_read32_da .global sh2_read32_dram -@ u32 a, u32 d +@ u32 a, u32 d, SH2 *sh2 .global sh2_write8_sdram .global sh2_write8_da .global sh2_write8_dram @@ -270,16 +270,14 @@ sh2_write32_dram: streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT] bxeq lr ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT] - mov r2, #0 tst r1, #0x00ff0000 - orrne r2, r2, #0x00ff0000 + bicne r0, r0, #0x00ff0000 tst r1, #0xff000000 - orrne r2, r2, #0xff000000 + bicne r0, r0, #0xff000000 tst r1, #0x000000ff - orrne r2, r2, #0x000000ff + bicne r0, r0, #0x000000ff tst r1, #0x0000ff00 - orrne r2, r2, #0x0000ff00 - bic r0, r0, r2 + bicne r0, r0, #0x0000ff00 orr r0, r0, r1 str r0, [ip, r3, lsr #SH2_DRAM_SHIFT] bx lr diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 66bdc478..1f19150e 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -137,6 +137,11 @@ static void dmac_memcpy(struct dma_chan *chan, SH2 *sh2) if (!up || chan->tcr < 4) return; + // XXX Mars Check Program fills a 64K buffer, then copies 32K longwords from + // DRAM to SDRAM in 4-longword mode, which is 128K. This overwrites a comm + // area in SDRAM, which is why the check fails. + // Is this a buswidth mismatch problem? As a kludge, usw 16-bit width xfers + if (size == 3 && (chan->sar & 0xdf000000) == 0x04000000) size = 1; if (size == 3) size = 2; // 4-word xfer mode still counts in words // XXX check TCR being a multiple of 4 in 4-word xfer mode? // XXX check alignment of sar/dar, generating a bus error if unaligned? @@ -500,7 +505,6 @@ static void dreq1_do(SH2 *sh2, struct dma_chan *chan) if ((chan->dar & ~0xf) != 0x20004030) elprintf(EL_32XP|EL_ANOMALY, "dreq1: bad dar?: %08x\n", chan->dar); - sh2->state |= SH2_STATE_SLEEP; dmac_transfer_one(sh2, chan); if (chan->tcr == 0) dmac_transfer_complete(sh2, chan); -- 2.39.2