From fe344bd3d8ab717452ae54eea8b2fdfc91e79fda Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 31 Aug 2019 17:37:18 +0200 Subject: [PATCH] cleanup and microoptimizations in SH2 hw handling --- Makefile | 9 +- README.md | 4 +- cpu/drc/emit_arm64.c | 3 +- cpu/drc/emit_mips.c | 4 +- cpu/drc/emit_x86.c | 2 +- pico/32x/32x.c | 2 +- pico/32x/memory.c | 309 +++++++++++++++++++++++++----------------- pico/32x/memory_arm.S | 2 + pico/32x/pwm.c | 184 +++++++++++++------------ pico/32x/sh2soc.c | 34 ++--- pico/draw.c | 2 - pico/pico_int.h | 2 +- 12 files changed, 309 insertions(+), 248 deletions(-) diff --git a/Makefile b/Makefile index 88b9238f..47463d51 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,6 @@ CFLAGS += -I. ifeq "$(DEBUG)" "0" CFLAGS += -O3 -DNDEBUG endif -ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) -# very small caches, avoid optimization options making the binary much bigger -CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp-clone # -fno-ipa-cp -endif # This is actually needed, bevieve me. # If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere. @@ -38,6 +34,11 @@ else # NO_CONFIG_MAK config.mak: endif +ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) +# very small caches, avoid optimization options making the binary much bigger +CFLAGS += -finline-limit=42 -fno-unroll-loops -fno-ipa-cp -fno-common -fno-stack-protector -ffast-math +endif + # default settings ifeq "$(ARCH)" "arm" use_cyclone ?= 1 diff --git a/README.md b/README.md index d7798231..8154f7dc 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,8 @@ assuming $TC points to the appropriate cross compile toolchain directory: platform|toolchain|configure command --------|---------|----------------- -gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x -gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include -fno-stack-protector -fno-common" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +gp2x,wiz,caanoo|open2x|CROSS_COMPILE=arm-open2x-linux- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include" LDFLAGS="--sysroot $TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x +gp2x,wiz,caanoo|open2x with ubuntu arm gcc 4.7|CROSS_COMPILE=arm-linux-gnueabi- CFLAGS="-I$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/include" LDFLAGS="-B$TC/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1 -B$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib -L$TC/gcc-4.1.1-glibc-2.3.6/arm-open2x-linux/lib" ./configure --platform=gp2x opendingux|opendingux|CROSS_COMPILE=mipsel-linux- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="--sysroot $TC -L$TC/lib" ./configure --platform=opendingux opendingux|opendingux with ubuntu mips gcc 5.4|CROSS_COMPILE=mipsel-linux-gnu- CFLAGS="-I$TC/usr/include -I$TC/usr/include/SDL" LDFLAGS="-B$TC/usr/lib -B$TC/lib -Wl,-rpath-link=$TC/usr/lib -Wl,-rpath-link=$TC/lib" ./configure --platform=opendingux gcw0|gcw0|CROSS_COMPILE=mipsel-gcw0-linux-uclibc- CFLAGS="-I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include -I$TC/usr/mipsel-gcw0-linux-uclibc/sysroot/usr/include/SDL" LDFLAGS="--sysroot $TC/usr/mipsel-gcw0-linux-uclibc/sysroot" ./configure --platform=gcw0 diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index a67f6819..de587619 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -1163,9 +1163,10 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) int t2 = rcache_get_tmp(); \ int t3 = rcache_get_tmp(); \ /* if (sr < 0) return */ \ - emith_asrf(t2, sr, 12); \ + emith_cmp_r_imm(sr, 0); \ EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 91d493b5..e200db0a 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -560,8 +560,8 @@ static void emith_set_arith_flags(int rd, int rt, int rs, s32 imm, int sub) #define emith_adc_r_r(d, s) \ emith_adc_r_r_r(d, d, s) -// NB: the incoming C can cause its own outgoing C if s2+C=0 (or s1+C=0 FWIW) -// moreover, s2 is 0 if there is C, so no other C can be generated. +// NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW) +// moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout #define emith_adcf_r_r_r(d, s1, s2) do { \ emith_add_r_r_r(FNZ, s2, FC); \ EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 62288ff5..d515cd23 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1,7 +1,7 @@ /* * Basic macros to emit x86 instructions and some utils * Copyright (C) 2008,2009,2010 notaz - * Copyright (C) 2019 kuv + * Copyright (C) 2019 kub * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 1511f3f7..e9d8ff6d 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -426,7 +426,7 @@ void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target) } #define STEP_LS 24 -#define STEP_N 440 +#define STEP_N 488 // one line #define sync_sh2s_normal p32x_sync_sh2s //#define sync_sh2s_lockstep p32x_sync_sh2s diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 7f494e7a..e139910a 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -220,7 +220,7 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) { int hix = (a >> 1) % PFIFO_CNT; struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; - struct sh2_poll_fifo *q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; + struct sh2_poll_fifo *q; int cpu = sh2 ? sh2->is_slave : -1; unsigned rd = sh2_poll_rd[hix], wr = sh2_poll_wr[hix]; unsigned idx, nrd; @@ -230,8 +230,9 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) // throw out any values written by other cpus, plus heading cancelled stuff for (idx = nrd = wr; idx != rd; ) { idx = (idx-1) % PFIFO_SZ; - if (fifo[idx].a == a && fifo[idx].cpu != cpu) { fifo[idx].a = -1; } - if (fifo[idx].a != -1) { nrd = idx; } + q = &fifo[idx]; + if (q->cpu != cpu && q->a == a) { q->a = -1; } + if (q->a != -1) { nrd = idx; } } rd = nrd; @@ -239,7 +240,8 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) // intermediate values that may cause synchronisation problems. // NB this can take an eternity on m68k: mov.b , needs // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head) - if (q->a == a && rd != wr && !CYCLES_GT(cycles,q->cycles+30)) { + q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; + if (rd != wr && q->a == a && !CYCLES_GT(cycles,q->cycles+30)) { q->d = d; } else { // store write to poll address in fifo @@ -493,6 +495,35 @@ static void p32x_reg_write8(u32 a, u32 d) case 0x1d: case 0x1e: case 0x1f: + return; + case 0x20: // comm port + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + if (REG8IN16(r, a) != d) { + int cycles = SekCyclesDone(); + + if (cycles - (int)msh2.m68krcycles_done > 30) + p32x_sync_sh2s(cycles); + + REG8IN16(r, a) = d; + p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); + sh2_poll_write(a & ~1, r[a / 2], cycles, NULL); + } + return; case 0x30: return; case 0x31: // PWM control @@ -532,22 +563,6 @@ static void p32x_reg_write8(u32 a, u32 d) p32x_pwm_write16(a & ~1, d, NULL, SekCyclesDone()); return; } - - if ((a & 0x30) == 0x20) { - int cycles = SekCyclesDone(); - - if (REG8IN16(r, a) == d) - return; - - if (cycles - (int)msh2.m68krcycles_done > 30) - p32x_sync_sh2s(cycles); - - REG8IN16(r, a) = d; - p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); - p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - sh2_poll_write(a & ~1, r[a / 2], cycles, NULL); - return; - } } static void p32x_reg_write16(u32 a, u32 d) @@ -558,61 +573,68 @@ static void p32x_reg_write16(u32 a, u32 d) // for things like bset on comm port m68k_poll.cnt = 0; - switch (a) { - case 0x00: // adapter ctl + switch (a/2) { + case 0x00/2: // adapter ctl if ((d ^ r[0]) & d & P32XS_nRES) p32x_reset_sh2s(); r[0] &= ~(P32XS_FM|P32XS_nRES|P32XS_ADEN); r[0] |= d & (P32XS_FM|P32XS_nRES|P32XS_ADEN); return; - case 0x08: // DREQ src + case 0x08/2: // DREQ src r[a / 2] = d & 0xff; return; - case 0x0a: + case 0x0a/2: r[a / 2] = d & ~1; return; - case 0x0c: // DREQ dest + case 0x0c/2: // DREQ dest r[a / 2] = d & 0xff; return; - case 0x0e: + case 0x0e/2: r[a / 2] = d; return; - case 0x10: // DREQ len + case 0x10/2: // DREQ len r[a / 2] = d & ~3; return; - case 0x12: // FIFO reg + case 0x12/2: // FIFO reg dreq0_write(r, d); return; - case 0x1a: // TV + mystery bit + case 0x1a/2: // TV + mystery bit r[a / 2] = d & 0x0101; return; - case 0x30: // PWM control + case 0x20/2: // comm port + case 0x22/2: + case 0x24/2: + case 0x26/2: + case 0x28/2: + case 0x2a/2: + case 0x2c/2: + case 0x2e/2: + if (r[a / 2] != d) { + int cycles = SekCyclesDone(); + + if (cycles - (int)msh2.m68krcycles_done > 30) + p32x_sync_sh2s(cycles); + + r[a / 2] = d; + p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); + sh2_poll_write(a, (u16)d, cycles, NULL); + } + return; + case 0x30/2: // PWM control d = (r[a / 2] & ~0x0f) | (d & 0x0f); r[a / 2] = d; p32x_pwm_write16(a, d, NULL, SekCyclesDone()); return; - } - - // comm port - if ((a & 0x30) == 0x20) { - int cycles = SekCyclesDone(); - - if (r[a / 2] == d) - return; - - if (cycles - (int)msh2.m68krcycles_done > 30) - p32x_sync_sh2s(cycles); - - r[a / 2] = d; - p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); - p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - sh2_poll_write(a, (u16)d, cycles, NULL); - return; - } - // PWM - else if ((a & 0x30) == 0x30) { - p32x_pwm_write16(a, d, NULL, SekCyclesDone()); - return; + case 0x32/2: + case 0x34/2: + case 0x36/2: + case 0x38/2: + case 0x3a/2: + case 0x3c/2: + case 0x3e/2: + p32x_pwm_write16(a, d, NULL, SekCyclesDone()); + return; } p32x_reg_write8(a + 1, d); @@ -709,23 +731,23 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) u16 *r = Pico32x.regs; a &= 0x3e; - switch (a) { - case 0x00: // adapter/irq ctl + switch (a/2) { + case 0x00/2: // adapter/irq ctl return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] | Pico32x.sh2irq_mask[sh2->is_slave]; - case 0x04: // H count (often as comm too) + case 0x04/2: // H count (often as comm too) sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); sh2s_sync_on_read(sh2); return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], sh2_cycles_done_m68k(sh2), sh2); - case 0x06: + case 0x06/2: return (r[a / 2] & ~P32XS_FULL) | 0x4000; - case 0x08: // DREQ src - case 0x0a: - case 0x0c: // DREQ dst - case 0x0e: - case 0x10: // DREQ len + case 0x08/2: // DREQ src + case 0x0a/2: + case 0x0c/2: // DREQ dst + case 0x0e/2: + case 0x10/2: // DREQ len return r[a / 2]; - case 0x12: // DREQ FIFO - does this work on hw? + case 0x12/2: // DREQ FIFO - does this work on hw? if (Pico32x.dmac0_fifo_ptr > 0) { Pico32x.dmac0_fifo_ptr--; r[a / 2] = Pico32x.dmac_fifo[0]; @@ -733,23 +755,34 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) Pico32x.dmac0_fifo_ptr * 2); } return r[a / 2]; - case 0x14: - case 0x16: - case 0x18: - case 0x1a: - case 0x1c: + case 0x14/2: + case 0x16/2: + case 0x18/2: + case 0x1a/2: + case 0x1c/2: return 0; // ? + case 0x20/2: // comm port + case 0x22/2: + case 0x24/2: + case 0x26/2: + case 0x28/2: + case 0x2a/2: + case 0x2c/2: + case 0x2e/2: + sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); + sh2s_sync_on_read(sh2); + return sh2_poll_read(a, r[a / 2], sh2_cycles_done_m68k(sh2), sh2); + case 0x30/2: // PWM + case 0x32/2: + case 0x34/2: + case 0x36/2: + case 0x38/2: + case 0x3a/2: + case 0x3c/2: + case 0x3e/2: + return p32x_pwm_read16(a, sh2, sh2_cycles_done_m68k(sh2)); } - // comm port - if ((a & 0x30) == 0x20) { - sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); - sh2s_sync_on_read(sh2); - return sh2_poll_read(a, r[a / 2], sh2_cycles_done_m68k(sh2), sh2); - } - if ((a & 0x30) == 0x30) - return p32x_pwm_read16(a, sh2, sh2_cycles_done_m68k(sh2)); - elprintf_sh2(sh2, EL_32X|EL_ANOMALY, "unhandled sysreg r16 [%02x] @%08x", a, sh2_pc(sh2)); return 0; @@ -796,6 +829,32 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) sh2_poll_write(a & ~1, d, cycles, sh2); } return; + case 0x20: // comm port + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + if (REG8IN16(r, a) != d) { + unsigned int cycles = sh2_cycles_done_m68k(sh2); + + REG8IN16(r, a) = d; + sh2_end_run(sh2, 1); + p32x_m68k_poll_event(P32XF_68KCPOLL); + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); + } + return; case 0x30: REG8IN16(r, a) = d & 0x0f; d = r[0x30 / 2]; @@ -837,20 +896,6 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) return; } - if ((a & 0x30) == 0x20) { - unsigned int cycles; - if (REG8IN16(r, a) == d) - return; - - REG8IN16(r, a) = d; - cycles = sh2_cycles_done_m68k(sh2); - sh2_end_run(sh2, 1); - p32x_m68k_poll_event(P32XF_68KCPOLL); - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); - return; - } - elprintf(EL_32X|EL_ANOMALY, "unhandled sysreg w8 [%02x] %02x @%08x", a, d, sh2_pc(sh2)); } @@ -861,49 +906,57 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) sh2->poll_cnt = 0; - // comm - if ((a & 0x30) == 0x20) { - unsigned int cycles; - if (Pico32x.regs[a / 2] == d) - return; - - Pico32x.regs[a / 2] = d; - cycles = sh2_cycles_done_m68k(sh2); - sh2_end_run(sh2, 1); - p32x_m68k_poll_event(P32XF_68KCPOLL); - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_poll_write(a, d, cycles, sh2); - return; - } - // PWM - else if ((a & 0x30) == 0x30) { - p32x_pwm_write16(a, d, sh2, sh2_cycles_done_m68k(sh2)); - return; - } - - switch (a) { - case 0: // FM + switch (a/2) { + case 0x00/2: // FM Pico32x.regs[0] &= ~P32XS_FM; Pico32x.regs[0] |= d & P32XS_FM; break; - case 0x14: + case 0x14/2: Pico32x.sh2irqs &= ~P32XI_VRES; goto irls; - case 0x16: + case 0x16/2: Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_VINT; goto irls; - case 0x18: + case 0x18/2: Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_HINT; goto irls; - case 0x1a: + case 0x1a/2: Pico32x.regs[2 / 2] &= ~(1 << sh2->is_slave); p32x_update_cmd_irq(sh2, 0); return; - case 0x1c: + case 0x1c/2: p32x_pwm_sync_to_sh2(sh2); Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_PWM; p32x_pwm_schedule_sh2(sh2); goto irls; + case 0x20/2: // comm port + case 0x22/2: + case 0x24/2: + case 0x26/2: + case 0x28/2: + case 0x2a/2: + case 0x2c/2: + case 0x2e/2: + if (Pico32x.regs[a / 2] != d) { + unsigned int cycles = sh2_cycles_done_m68k(sh2); + + Pico32x.regs[a / 2] = d; + sh2_end_run(sh2, 1); + p32x_m68k_poll_event(P32XF_68KCPOLL); + p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); + sh2_poll_write(a, d, cycles, sh2); + } + return; + case 0x30/2: // PWM + case 0x32/2: + case 0x34/2: + case 0x36/2: + case 0x38/2: + case 0x3a/2: + case 0x3c/2: + case 0x3e/2: + p32x_pwm_write16(a, d, sh2, sh2_cycles_done_m68k(sh2)); + return; } p32x_sh2reg_write8(a | 1, d, sh2); @@ -1391,7 +1444,7 @@ static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2) sh2_burn_cycles(sh2, 1*2); - // 0x3ffc0 is veridied + // 0x3ffc0 is verified if ((a & 0x3ffc0) == 0x4000) { d = p32x_sh2reg_read16(a, sh2); goto out_16to8; @@ -1573,6 +1626,11 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) elprintf_sh2(sh2, EL_32X, "w8 [%08x] %02x @%06x", a, d & 0xff, sh2_pc(sh2)); + if ((a & 0x3ffc0) == 0x4000) { + p32x_sh2reg_write8(a, d, sh2); + goto out; + } + if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { sh2->poll_cnt = 0; @@ -1588,11 +1646,6 @@ static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) } } - if ((a & 0x3ffc0) == 0x4000) { - p32x_sh2reg_write8(a, d, sh2); - goto out; - } - sh2_write8_unmapped(a, d, sh2); out: DRC_RESTORE_SR(sh2); @@ -1647,6 +1700,11 @@ static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) elprintf_sh2(sh2, EL_32X, "w16 [%08x] %04x @%06x", a, d & 0xffff, sh2_pc(sh2)); + if ((a & 0x3ffc0) == 0x4000) { + p32x_sh2reg_write16(a, d, sh2); + goto out; + } + if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { sh2->poll_cnt = 0; @@ -1662,11 +1720,6 @@ static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) } } - if ((a & 0x3ffc0) == 0x4000) { - p32x_sh2reg_write16(a, d, sh2); - goto out; - } - sh2_write16_unmapped(a, d, sh2); out: DRC_RESTORE_SR(sh2); diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index 43a01958..ba83a6bf 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -18,6 +18,7 @@ .text +#if 0 @ u32 a, SH2 *sh2 .global sh2_read8_rom .global sh2_read8_sdram @@ -31,6 +32,7 @@ .global sh2_read32_sdram .global sh2_read32_da .global sh2_read32_dram +#endif @ u32 a, u32 d, SH2 *sh2 .global sh2_write8_sdram diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index 50735642..1c1ec428 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -7,12 +7,15 @@ */ #include "../pico_int.h" -static int pwm_cycles; -static int pwm_mult; -static int pwm_ptr; -static int pwm_irq_reload; -static int pwm_doing_fifo; -static int pwm_silent; +static struct { + int cycles; + int mult; + int ptr; + int irq_reload; + int doing_fifo; + int silent; + short current[2]; +} pwm; void p32x_pwm_ctl_changed(void) { @@ -20,19 +23,19 @@ void p32x_pwm_ctl_changed(void) int cycles = Pico32x.regs[0x32 / 2]; cycles = (cycles - 1) & 0x0fff; - pwm_cycles = cycles; + pwm.cycles = cycles; // supposedly we should stop FIFO when xMd is 0, // but mars test disagrees - pwm_mult = 0; + pwm.mult = 0; if ((control & 0x0f) != 0) - pwm_mult = 0x10000 / cycles; + pwm.mult = 0x10000 / cycles; - pwm_irq_reload = (control & 0x0f00) >> 8; - pwm_irq_reload = ((pwm_irq_reload - 1) & 0x0f) + 1; + pwm.irq_reload = (control & 0x0f00) >> 8; + pwm.irq_reload = ((pwm.irq_reload - 1) & 0x0f) + 1; if (Pico32x.pwm_irq_cnt == 0) - Pico32x.pwm_irq_cnt = pwm_irq_reload; + Pico32x.pwm_irq_cnt = pwm.irq_reload; } static void do_pwm_irq(SH2 *sh2, unsigned int m68k_cycles) @@ -40,7 +43,7 @@ static void do_pwm_irq(SH2 *sh2, unsigned int m68k_cycles) p32x_trigger_irq(sh2, m68k_cycles, P32XI_PWM); if (Pico32x.regs[0x30 / 2] & P32XP_RTP) { - p32x_event_schedule(m68k_cycles, P32X_EVENT_PWM, pwm_cycles / 3 + 1); + p32x_event_schedule(m68k_cycles, P32X_EVENT_PWM, pwm.cycles / 3 + 1); // note: might recurse p32x_dreq1_trigger(); } @@ -50,14 +53,14 @@ static int convert_sample(unsigned int v) { if (v == 0) return 0; - if (v > pwm_cycles) - v = pwm_cycles; - return ((int)v - pwm_cycles / 2) * pwm_mult; + if (v > pwm.cycles) + v = pwm.cycles; + return (v * 2 - pwm.cycles) / 2 * pwm.mult; } #define consume_fifo(sh2, m68k_cycles) { \ int cycles_diff = ((m68k_cycles) * 3) - Pico32x.pwm_cycle_p; \ - if (cycles_diff >= pwm_cycles) \ + if (cycles_diff >= pwm.cycles) \ consume_fifo_do(sh2, m68k_cycles, cycles_diff); \ } @@ -69,67 +72,63 @@ static void consume_fifo_do(SH2 *sh2, unsigned int m68k_cycles, unsigned short *fifo_r = mem->pwm_fifo[1]; int sum = 0; - if (pwm_cycles == 0 || pwm_doing_fifo) + if (pwm.cycles == 0 || pwm.doing_fifo) return; elprintf(EL_PWM, "pwm: %u: consume %d/%d, %d,%d ptr %d", - m68k_cycles, sh2_cycles_diff, sh2_cycles_diff / pwm_cycles, - Pico32x.pwm_p[0], Pico32x.pwm_p[1], pwm_ptr); + m68k_cycles, sh2_cycles_diff, sh2_cycles_diff / pwm.cycles, + Pico32x.pwm_p[0], Pico32x.pwm_p[1], pwm.ptr); // this is for recursion from dreq1 writes - pwm_doing_fifo = 1; + pwm.doing_fifo = 1; - for (; sh2_cycles_diff >= pwm_cycles; sh2_cycles_diff -= pwm_cycles) + for (; sh2_cycles_diff >= pwm.cycles; sh2_cycles_diff -= pwm.cycles) { if (Pico32x.pwm_p[0] > 0) { - fifo_l[0] = fifo_l[1]; - fifo_l[1] = fifo_l[2]; - fifo_l[2] = fifo_l[3]; + mem->pwm_index[0] = (mem->pwm_index[0]+1) % 4; Pico32x.pwm_p[0]--; - mem->pwm_current[0] = convert_sample(fifo_l[0]); - sum += mem->pwm_current[0]; + pwm.current[0] = convert_sample(fifo_l[mem->pwm_index[0]]); + sum |=pwm.current[0]; } if (Pico32x.pwm_p[1] > 0) { - fifo_r[0] = fifo_r[1]; - fifo_r[1] = fifo_r[2]; - fifo_r[2] = fifo_r[3]; + mem->pwm_index[1] = (mem->pwm_index[1]+1) % 4; Pico32x.pwm_p[1]--; - mem->pwm_current[1] = convert_sample(fifo_r[0]); - sum += mem->pwm_current[1]; + pwm.current[1] = convert_sample(fifo_r[mem->pwm_index[1]]); + sum |= pwm.current[1]; } - mem->pwm[pwm_ptr * 2 ] = mem->pwm_current[0]; - mem->pwm[pwm_ptr * 2 + 1] = mem->pwm_current[1]; - pwm_ptr = (pwm_ptr + 1) & (PWM_BUFF_LEN - 1); + mem->pwm[pwm.ptr * 2 ] = pwm.current[0]; + mem->pwm[pwm.ptr * 2 + 1] = pwm.current[1]; + pwm.ptr = (pwm.ptr + 1) & (PWM_BUFF_LEN - 1); if (--Pico32x.pwm_irq_cnt == 0) { - Pico32x.pwm_irq_cnt = pwm_irq_reload; + Pico32x.pwm_irq_cnt = pwm.irq_reload; do_pwm_irq(sh2, m68k_cycles); } } Pico32x.pwm_cycle_p = m68k_cycles * 3 - sh2_cycles_diff; - pwm_doing_fifo = 0; + pwm.doing_fifo = 0; if (sum != 0) - pwm_silent = 0; + pwm.silent = 0; } static int p32x_pwm_schedule_(SH2 *sh2, unsigned int m68k_now) { - unsigned int sh2_now = m68k_now * 3; + unsigned int pwm_now = m68k_now * 3; int cycles_diff_sh2; - if (pwm_cycles == 0) + if (pwm.cycles == 0) return 0; - cycles_diff_sh2 = sh2_now - Pico32x.pwm_cycle_p; - if (cycles_diff_sh2 >= pwm_cycles) + cycles_diff_sh2 = pwm_now - Pico32x.pwm_cycle_p; + if (cycles_diff_sh2 >= pwm.cycles) consume_fifo_do(sh2, m68k_now, cycles_diff_sh2); if (!((Pico32x.sh2irq_mask[0] | Pico32x.sh2irq_mask[1]) & 1)) return 0; // masked by everyone - cycles_diff_sh2 = sh2_now - Pico32x.pwm_cycle_p; - return (Pico32x.pwm_irq_cnt * pwm_cycles + cycles_diff_sh2 = pwm_now - Pico32x.pwm_cycle_p; + return (Pico32x.pwm_irq_cnt * pwm.cycles - cycles_diff_sh2) / 3 + 1; } @@ -166,21 +165,21 @@ unsigned int p32x_pwm_read16(unsigned int a, SH2 *sh2, consume_fifo(sh2, m68k_cycles); a &= 0x0e; - switch (a) { - case 0: // control - case 2: // cycle + switch (a/2) { + case 0/2: // control + case 2/2: // cycle d = Pico32x.regs[(0x30 + a) / 2]; break; - case 4: // L ch + case 4/2: // L ch if (Pico32x.pwm_p[0] == 3) d |= P32XP_FULL; else if (Pico32x.pwm_p[0] == 0) d |= P32XP_EMPTY; break; - case 6: // R ch - case 8: // MONO + case 6/2: // R ch + case 8/2: // MONO if (Pico32x.pwm_p[1] == 3) d |= P32XP_FULL; else if (Pico32x.pwm_p[1] == 0) @@ -196,47 +195,53 @@ unsigned int p32x_pwm_read16(unsigned int a, SH2 *sh2, void p32x_pwm_write16(unsigned int a, unsigned int d, SH2 *sh2, unsigned int m68k_cycles) { + unsigned short *fifo; + int idx; + elprintf(EL_PWM, "pwm: %u: w16 %02x %04x (p %d %d)", m68k_cycles, a & 0x0e, d, Pico32x.pwm_p[0], Pico32x.pwm_p[1]); consume_fifo(sh2, m68k_cycles); a &= 0x0e; - if (a == 0) { // control - // avoiding pops.. - if ((Pico32x.regs[0x30 / 2] & 0x0f) == 0) - Pico32xMem->pwm_fifo[0][0] = Pico32xMem->pwm_fifo[1][0] = 0; - Pico32x.regs[0x30 / 2] = d; - p32x_pwm_ctl_changed(); - Pico32x.pwm_irq_cnt = pwm_irq_reload; // ? - } - else if (a == 2) { // cycle - Pico32x.regs[0x32 / 2] = d & 0x0fff; - p32x_pwm_ctl_changed(); - } - else if (a <= 8) { - d = (d - 1) & 0x0fff; - - if (a == 4 || a == 8) { // L ch or MONO - unsigned short *fifo = Pico32xMem->pwm_fifo[0]; - if (Pico32x.pwm_p[0] < 3) - Pico32x.pwm_p[0]++; - else { - fifo[1] = fifo[2]; - fifo[2] = fifo[3]; - } - fifo[Pico32x.pwm_p[0]] = d; - } - if (a == 6 || a == 8) { // R ch or MONO - unsigned short *fifo = Pico32xMem->pwm_fifo[1]; + switch (a/2) { + case 0/2: // control + // avoiding pops.. + if ((Pico32x.regs[0x30 / 2] & 0x0f) == 0) + Pico32xMem->pwm_fifo[0][0] = Pico32xMem->pwm_fifo[1][0] = 0; + Pico32x.regs[0x30 / 2] = d; + p32x_pwm_ctl_changed(); + Pico32x.pwm_irq_cnt = pwm.irq_reload; // ? + break; + case 2/2: // cycle + Pico32x.regs[0x32 / 2] = d & 0x0fff; + p32x_pwm_ctl_changed(); + break; + case 8/2: // MONO + case 6/2: // R ch + fifo = Pico32xMem->pwm_fifo[1]; + idx = Pico32xMem->pwm_index[1]; if (Pico32x.pwm_p[1] < 3) Pico32x.pwm_p[1]++; else { - fifo[1] = fifo[2]; - fifo[2] = fifo[3]; +// fifo[(idx+1) % 4] = fifo[idx]; + idx = (idx+1) % 4; + Pico32xMem->pwm_index[0] = idx; } - fifo[Pico32x.pwm_p[1]] = d; - } + fifo[(idx+Pico32x.pwm_p[1]) % 4] = (d - 1) & 0x0fff; + if (a != 8) break; // fallthrough if MONO + case 4/2: // L ch + fifo = Pico32xMem->pwm_fifo[0]; + idx = Pico32xMem->pwm_index[0]; + if (Pico32x.pwm_p[0] < 3) + Pico32x.pwm_p[0]++; + else { +// fifo[(idx+1) % 4] = fifo[idx]; + idx = (idx+1) % 4; + Pico32xMem->pwm_index[0] = idx; + } + fifo[(idx+Pico32x.pwm_p[0]) % 4] = (d - 1) & 0x0fff; + break; } } @@ -252,10 +257,10 @@ void p32x_pwm_update(int *buf32, int length, int stereo) xmd = Pico32x.regs[0x30 / 2] & 0x0f; if (xmd == 0 || xmd == 0x06 || xmd == 0x09 || xmd == 0x0f) goto out; // invalid? - if (pwm_silent) + if (pwm.silent) return; - step = (pwm_ptr << 16) / length; + step = (pwm.ptr << 16) / length; pwmb = Pico32xMem->pwm; if (stereo) @@ -310,13 +315,12 @@ void p32x_pwm_update(int *buf32, int length, int stereo) } } - elprintf(EL_PWM, "pwm_update: pwm_ptr %d, len %d, step %04x, done %d", - pwm_ptr, length, step, (pwmb - Pico32xMem->pwm) / 2); + elprintf(EL_PWM, "pwm_update: pwm.ptr %d, len %d, step %04x, done %d", + pwm.ptr, length, step, (pwmb - Pico32xMem->pwm) / 2); out: - pwm_ptr = 0; - pwm_silent = Pico32xMem->pwm_current[0] == 0 - && Pico32xMem->pwm_current[1] == 0; + pwm.ptr = 0; + pwm.silent = pwm.current[0] == 0 && pwm.current[1] == 0; } void p32x_pwm_state_loaded(void) @@ -327,8 +331,8 @@ void p32x_pwm_state_loaded(void) // for old savestates cycles_diff_sh2 = Pico.t.m68c_cnt * 3 - Pico32x.pwm_cycle_p; - if (cycles_diff_sh2 >= pwm_cycles || cycles_diff_sh2 < 0) { - Pico32x.pwm_irq_cnt = pwm_irq_reload; + if (cycles_diff_sh2 >= pwm.cycles || cycles_diff_sh2 < 0) { + Pico32x.pwm_irq_cnt = pwm.irq_reload; Pico32x.pwm_cycle_p = Pico.t.m68c_cnt * 3; p32x_pwm_schedule(Pico.t.m68c_cnt); } diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 1f19150e..2b5a126c 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -399,6 +399,7 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) { u32 *r = sh2->peri_regs; u32 old; + struct dmac *dmac; elprintf_sh2(sh2, EL_32XP, "peri w32 [%08x] %08x @%06x", a, d, sh2_pc(sh2)); @@ -439,22 +440,23 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) else r[0x110 / 4] = r[0x114 / 4] = r[0x118 / 4] = r[0x11c / 4] = 0; // ? break; - } - - // perhaps starting a DMA? - if (a == 0x1b0 || a == 0x18c || a == 0x19c) { - struct dmac *dmac = (void *)&sh2->peri_regs[0x180 / 4]; - if (a == 0x1b0 && !((old ^ d) & d & DMA_DME)) - return; - if (!(dmac->dmaor & DMA_DME)) - return; - - DRC_SAVE_SR(sh2); - if ((dmac->chan[0].chcr & (DMA_TE|DMA_DE)) == DMA_DE) - dmac_trigger(sh2, &dmac->chan[0]); - if ((dmac->chan[1].chcr & (DMA_TE|DMA_DE)) == DMA_DE) - dmac_trigger(sh2, &dmac->chan[1]); - DRC_RESTORE_SR(sh2); + // perhaps starting a DMA? + case 0x18c: + case 0x19c: + case 0x1b0: + dmac = (void *)&sh2->peri_regs[0x180 / 4]; + if (a == 0x1b0 && !((old ^ d) & d & DMA_DME)) + return; + if (!(dmac->dmaor & DMA_DME)) + return; + + DRC_SAVE_SR(sh2); + if ((dmac->chan[0].chcr & (DMA_TE|DMA_DE)) == DMA_DE) + dmac_trigger(sh2, &dmac->chan[0]); + if ((dmac->chan[1].chcr & (DMA_TE|DMA_DE)) == DMA_DE) + dmac_trigger(sh2, &dmac->chan[1]); + DRC_RESTORE_SR(sh2); + break; } } diff --git a/pico/draw.c b/pico/draw.c index 0bf7c3de..7fd93f8e 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1347,8 +1347,6 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) *pd++ = pal[*ps++]; *pd++ = pal[*ps++]; } -// for (i = 0; i < len; i++) -// pd[i] = pal[ps[i]]; #else extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); diff --git a/pico/pico_int.h b/pico/pico_int.h index 36b36144..89acc4fb 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -630,8 +630,8 @@ struct Pico32xMem unsigned short pal[0x100]; unsigned short pal_native[0x100]; // converted to native (for renderer) signed short pwm[2*PWM_BUFF_LEN]; // PWM buffer for current frame - signed short pwm_current[2]; // current converted samples unsigned short pwm_fifo[2][4]; // [0] - current raw, others - fifo entries + unsigned pwm_index[2]; // ringbuffer index for pwm_fifo }; // area.c -- 2.39.2