From: kub Date: Tue, 11 Jul 2023 21:18:05 +0000 (+0000) Subject: core, improve 68k timing accuracy X-Git-Tag: v2.00~190 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7263343dc740ff930ae9880f5fed7a8e6689d787;p=picodrive.git core, improve 68k timing accuracy --- diff --git a/pico/32x/32x.c b/pico/32x/32x.c index 61da4816..d0d26f02 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -197,10 +197,6 @@ void p32x_reset_sh2s(void) void Pico32xInit(void) { - if (msh2.mult_m68k_to_sh2 == 0 || msh2.mult_sh2_to_m68k == 0) - Pico32xSetClocks(PICO_MSH2_HZ, 0); - if (ssh2.mult_m68k_to_sh2 == 0 || ssh2.mult_sh2_to_m68k == 0) - Pico32xSetClocks(0, PICO_MSH2_HZ); } void PicoPower32x(void) @@ -284,8 +280,11 @@ static void p32x_end_blank(void) Pico32x.vdp_regs[0x0a/2] &= ~P32XV_VBLK; // get out of vblank if ((Pico32x.vdp_regs[0] & P32XV_Mx) != 0) // no forced blanking Pico32x.vdp_regs[0x0a/2] &= ~P32XV_PEN; // no palette access - if (!(Pico32x.sh2_regs[0] & 0x80)) + if (!(Pico32x.sh2_regs[0] & 0x80)) { + // NB must precede VInt per hw manual, min 4 SH-2 cycles to pass Mars Check + Pico32x.hint_counter = -0x18; p32x_schedule_hint(NULL, Pico.t.m68c_aim); + } p32x_sh2_poll_event(msh2.poll_addr, &msh2, SH2_STATE_VPOLL, Pico.t.m68c_aim); p32x_sh2_poll_event(ssh2.poll_addr, &ssh2, SH2_STATE_VPOLL, Pico.t.m68c_aim); @@ -300,7 +299,9 @@ void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles) if (!(Pico32x.sh2_regs[0] & 0x80) && (Pico.video.status & PVS_VB2)) return; - after = (Pico32x.sh2_regs[4 / 2] + 1) * 488; + Pico32x.hint_counter += (Pico32x.sh2_regs[4 / 2] + 1) * (int)(488.5*0x10); + after = Pico32x.hint_counter >> 4; + Pico32x.hint_counter &= 0xf; if (sh2 != NULL) p32x_event_schedule_sh2(sh2, P32X_EVENT_HINT, after); else @@ -633,7 +634,8 @@ void Pico32xStateLoaded(int is_early) return; } - if (sh2s[0].m68krcycles_done == 0 && sh2s[1].m68krcycles_done == 0) + if (CYCLES_GE(sh2s[0].m68krcycles_done - Pico.t.m68c_aim, 500) || + CYCLES_GE(sh2s[1].m68krcycles_done - Pico.t.m68c_aim, 500)) sh2s[0].m68krcycles_done = sh2s[1].m68krcycles_done = SekCyclesDone(); p32x_update_irls(NULL, SekCyclesDone()); p32x_timers_recalc(); @@ -643,6 +645,11 @@ void Pico32xStateLoaded(int is_early) void Pico32xPrepare(void) { + if (msh2.mult_m68k_to_sh2 == 0 || msh2.mult_sh2_to_m68k == 0) + Pico32xSetClocks(PICO_MSH2_HZ, 0); + if (ssh2.mult_m68k_to_sh2 == 0 || ssh2.mult_sh2_to_m68k == 0) + Pico32xSetClocks(0, PICO_MSH2_HZ); + sh2_execute_prepare(&msh2, PicoIn.opt & POPT_EN_DRC); sh2_execute_prepare(&ssh2, PicoIn.opt & POPT_EN_DRC); } diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index c9a4e6a9..44376322 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -123,18 +123,13 @@ static void SekRunS68k(unsigned int to) pprof_end(s68k); } -static void pcd_set_cycle_mult(void) +void PicoMCDPrepare(void) { - unsigned int div; - - if (Pico.m.pal) - div = 50*313*488; - else - div = 60*262*488; - - // ~1.63 for NTSC, ~1.645 for PAL; round to nearest, x/y+0.5 -> (x+y/2)/y - mcd_m68k_cycle_mult = ((12500000ull << 16) + div/2) / div; - mcd_s68k_cycle_mult = ((1ull*div << 16) + 6250000) / 12500000; + // ~1.63 for NTSC, ~1.645 for PAL +#define DIV_ROUND(x,y) ((x)+(y)/2) / (y) // round to nearest, x/y+0.5 -> (x+y/2)/y + unsigned int osc = (Pico.m.pal ? OSC_PAL : OSC_NTSC); + mcd_m68k_cycle_mult = DIV_ROUND(12500000ull << 16, osc / 7); + mcd_s68k_cycle_mult = DIV_ROUND(1ull * osc << 16, 7 * 12500000); } unsigned int pcd_cycles_m68k_to_s68k(unsigned int c) @@ -312,11 +307,13 @@ int pcd_sync_s68k(unsigned int m68k_target, int m68k_poll_sync) #define pcd_run_cpus_normal pcd_run_cpus //#define pcd_run_cpus_lockstep pcd_run_cpus +static void SekAimM68k(int cyc, int mult); static int SekSyncM68k(int once); void pcd_run_cpus_normal(int m68k_cycles) { - Pico.t.m68c_aim += m68k_cycles; + // TODO this is suspicious. ~1 cycle refresh delay every 256 cycles? + SekAimM68k(m68k_cycles, 0x43); // Fhey area while (CYCLES_GT(Pico.t.m68c_aim, Pico.t.m68c_cnt)) { if (SekShouldInterrupt()) { @@ -376,8 +373,6 @@ void pcd_run_cpus_lockstep(int m68k_cycles) void pcd_prepare_frame(void) { - pcd_set_cycle_mult(); - // need this because we can't have direct mapping between // master<->slave cycle counters because of overflows mcd_m68k_cycle_base = Pico.t.m68c_aim; @@ -397,7 +392,6 @@ void pcd_state_loaded(void) unsigned int cycles; int diff; - pcd_set_cycle_mult(); pcd_state_loaded_mem(); memset(Pico_mcd->pcm_mixbuf, 0, sizeof(Pico_mcd->pcm_mixbuf)); @@ -407,8 +401,7 @@ void pcd_state_loaded(void) // old savestates.. cycles = pcd_cycles_m68k_to_s68k(Pico.t.m68c_aim); - diff = cycles - SekCycleAimS68k; - if (diff < -1000 || diff > 1000) { + if (CYCLES_GE(cycles - SekCycleAimS68k, 1000)) { SekCycleCntS68k = SekCycleAimS68k = cycles; } if (pcd_event_times[PCD_EVENT_CDC] == 0) { diff --git a/pico/memory.c b/pico/memory.c index 2f373cd6..2016f48d 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -1041,11 +1041,11 @@ static int get_scanline(int is_from_z80) if (is_from_z80) { // ugh... compute by dividing cycles since frame start by cycles per line // need some fractional resolution here, else there may be an extra line - int cycles_line = cycles_68k_to_z80(488 << 8)+1; // cycles per line, as Q8 + int cycles_line = cycles_68k_to_z80((unsigned)(488.5*256))+1; // cycles per line, Q8 int cycles_z80 = (z80_cyclesLeft<0 ? Pico.t.z80c_aim:z80_cyclesDone())<<8; int cycles = cycles_line * Pico.t.z80_scanline; // approximation by multiplying with inverse - if (cycles_z80 - cycles >= 2*cycles_line) { + if (cycles_z80 - cycles >= 4*cycles_line) { // compute 1/cycles_line, storing the result to avoid future dividing static int cycles_line_o, cycles_line_i; if (cycles_line_o != cycles_line) @@ -1150,7 +1150,6 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) switch (addr) { - // NB, OD2 A/V sync HACK: lower timer step by 1/4 z80 cycle (=64 in Q8) case 0x24: // timer A High 8 case 0x25: { // timer A Low 2 int TAnew = (addr == 0x24) ? ((ym2612.OPN.ST.TA & 0x03)|(((int)d)<<2)) @@ -1163,7 +1162,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) ym2612.OPN.ST.TA = TAnew; //ym2612.OPN.ST.TAC = (1024-TAnew)*18; //ym2612.OPN.ST.TAT = 0; - Pico.t.timer_a_step = TIMER_A_TICK_ZCYCLES * (1024 - TAnew) - 64; + Pico.t.timer_a_step = TIMER_A_TICK_ZCYCLES * (1024 - TAnew); elprintf(EL_YMTIMER, "timer a set to %i, %i", 1024 - TAnew, Pico.t.timer_a_next_oflow>>8); } return 0; @@ -1176,7 +1175,7 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) ym2612.OPN.ST.TB = d; //ym2612.OPN.ST.TBC = (256-d) * 288; //ym2612.OPN.ST.TBT = 0; - Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * (256 - d) - 64; + Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * (256 - d); elprintf(EL_YMTIMER, "timer b set to %i, %i", 256 - d, Pico.t.timer_b_next_oflow>>8); } return 0; @@ -1350,7 +1349,7 @@ static void access_68k_bus(int delay) // bus delay as Q8 // until an additional cycle is full. That is then added to the integer part. Pico.t.z80_busdelay = (delay&0xff) + (Pico.t.z80_busdelay&0xff); // accumulate z80_subCLeft((delay>>8) + (Pico.t.z80_busdelay>>8)); - // don't use SekCyclesBurn(7) here since the Z80 doesn't run in cycle lock to + // don't use SekCyclesBurn() here since the Z80 doesn't run in cycle lock to // the 68K. Count the stolen cycles to be accounted later in the 68k CPU runs Pico.t.z80_buscycles += 7; } @@ -1358,8 +1357,8 @@ static void access_68k_bus(int delay) // bus delay as Q8 static unsigned char z80_md_vdp_read(unsigned short a) { if ((a & 0xff00) == 0x7f00) { - // 68k bus access delay=3.3 per kabuto, for notaz picotest 2.4<=delay<2.55? - access_68k_bus(0x280); // Q8, picotest: 0x266(>=2.4) - 0x28b(<2.55) + // 68k bus access delay=3.3 per kabuto, for notaz picotest 2.422.42) - 0x292(<2.57) switch (a & 0x0d) { @@ -1383,8 +1382,8 @@ static unsigned char z80_md_bank_read(unsigned short a) unsigned int addr68k; unsigned char ret; - // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.03.0)-0x34c(<3.3) + // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.023.02)-0x351(<3.32) addr68k = Pico.m.z80_bank68k << 15; addr68k |= a & 0x7fff; @@ -1425,8 +1424,8 @@ static void z80_md_bank_write(unsigned int a, unsigned char data) { unsigned int addr68k; - // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.03.0)-0x34c(<3.3) + // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.023.02)-0x351(<3.32) addr68k = Pico.m.z80_bank68k << 15; addr68k += a & 0x7fff; diff --git a/pico/pico.c b/pico/pico.c index 7976c9e4..6b2124b3 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -228,6 +228,8 @@ void PicoLoopPrepare(void) Pico.m.dirtyPal = 1; rendstatus_old = -1; + if (PicoIn.AHW & PAHW_MCD) + PicoMCDPrepare(); if (PicoIn.AHW & PAHW_32X) Pico32xPrepare(); } diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 2f1747fe..40ce766e 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -66,15 +66,22 @@ static int SekSyncM68k(int once) return Pico.t.m68c_aim > Pico.t.m68c_cnt; } -static __inline void SekRunM68k(int cyc) +static __inline void SekAimM68k(int cyc, int mult) { - // refresh slowdown handling, 2 cycles every 128 - make this 1 every 64 + // refresh slowdown, for cart: 2 cycles every 128 - make this 1 every 64, + // for RAM: seems to be 0-3 every 128. Carts usually run from the cart + // area, but MCD games only use RAM, hence a different multiplier is needed. // NB must be quite accurate, so handle fractions as well (c/f OutRunners) - static int refresh; - Pico.t.m68c_cnt += (cyc + refresh) >> 6; - refresh = (cyc + refresh) & 0x3f; + int delay = (Pico.t.refresh_delay += cyc*mult) >> 14; + Pico.t.m68c_cnt += delay; + Pico.t.refresh_delay -= delay << 14; Pico.t.m68c_aim += cyc; +} +static __inline void SekRunM68k(int cyc) +{ + // TODO 0x100 would by 2 cycles/128, moreover far too sensitive + SekAimM68k(cyc, 0x10c); // OutRunners, testpico, VDPFIFOTesting SekSyncM68k(0); } @@ -108,10 +115,9 @@ static void do_timing_hacks_end(struct PicoVideo *pv) PicoVideoFIFOSync(CYCLES_M68K_LINE); // need rather tight Z80 sync for emulation of main bus cycle stealing - if (Pico.m.scanline&1) { + if (Pico.m.scanline&1) if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) PicoSyncZ80(Pico.t.m68c_aim); - } } static void do_timing_hacks_start(struct PicoVideo *pv) @@ -122,6 +128,8 @@ static void do_timing_hacks_start(struct PicoVideo *pv) // XXX how to handle Z80 bus cycle stealing during DMA correctly? if ((Pico.t.z80_buscycles -= cycles) < 0) Pico.t.z80_buscycles = 0; + if (Pico.m.scanline&1) + Pico.t.m68c_aim += 1; // add cycle each other line for 488.5 cycles/line } static int PicoFrameHints(void) @@ -167,7 +175,7 @@ static int PicoFrameHints(void) } // decide if we draw this line - if (!skip && (PicoIn.opt & POPT_ALT_RENDERER)) + if ((PicoIn.opt & POPT_ALT_RENDERER) && !skip) { // find the right moment for frame renderer, when display is no longer blanked if ((pv->reg[1]&0x40) || y > 100) { diff --git a/pico/pico_int.h b/pico/pico_int.h index c04a5ccf..d3229149 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -204,7 +204,7 @@ extern struct DrZ80 drZ80; #define z80_cyclesDone() \ (Pico.t.z80c_aim - z80_cyclesLeft) -// 68k clock = OSC/7, z80 clock = OSC/15, 68k:z80 ratio = 7/15*8192=3822.9 +// 68k clock = OSC/7, z80 clock = OSC/15, 68k:z80 ratio = 7/15 = 3822.9/8192 #define cycles_68k_to_z80(x) ((x) * 3823 >> 13) // ----------------------- SH2 CPU ----------------------- @@ -443,6 +443,7 @@ struct PicoTiming unsigned int m68c_aim; unsigned int m68c_frame_start; // m68k cycles unsigned int m68c_line_start; + int refresh_delay; unsigned int z80c_cnt; // z80 cycles done (this frame) unsigned int z80c_aim; @@ -523,7 +524,7 @@ struct mcd_misc unsigned int stopwatch_base_c; unsigned short m68k_poll_a; unsigned short m68k_poll_cnt; - unsigned short s68k_poll_a; + unsigned short s68k_poll_a; // 10 unsigned short s68k_poll_cnt; unsigned int s68k_poll_clk; unsigned char bcram_reg; // 18: battery-backed RAM cart register @@ -640,7 +641,8 @@ struct Pico32x unsigned char pad1; unsigned short pwm_p[2]; // pwm pos in fifo unsigned int pwm_cycle_p; // pwm play cursor (32x cycles) - unsigned int reserved[6]; + unsigned int hint_counter; + unsigned int reserved[5]; }; struct Pico32xMem @@ -803,6 +805,7 @@ PICO_INTERNAL void PicoExitMCD(void); PICO_INTERNAL void PicoPowerMCD(void); PICO_INTERNAL int PicoResetMCD(void); PICO_INTERNAL void PicoFrameMCD(void); +PICO_INTERNAL void PicoMCDPrepare(void); enum pcd_event { PCD_EVENT_CDC, diff --git a/pico/sound/sound.c b/pico/sound/sound.c index d80ba4e3..f4686c2d 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -177,7 +177,7 @@ void PsndRerate(int preserve_state) // samples per line (Q16) Pico.snd.smpl_mult = 65536LL * PicoIn.sndRate / (target_fps*target_lines); // samples per z80 clock (Q20) - Pico.snd.clkl_mult = 16 * Pico.snd.smpl_mult * 15/7 / 488; + Pico.snd.clkl_mult = 16 * Pico.snd.smpl_mult * 15/7 / 488.5; // samples per 44.1 KHz sample Pico.snd.cdda_mult = 65536LL * 44100 / PicoIn.sndRate; Pico.snd.cdda_div = 65536LL * PicoIn.sndRate / 44100; diff --git a/pico/videoport.c b/pico/videoport.c index 6fb47a7f..bd744a3b 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -21,25 +21,27 @@ enum { clkdiv = 2 }; // CPU clock granularity: one of 1,2,4,8 // Thank you very much for the great work, Nemesis, Kabuto! // Slot clock is sysclock/20 for h32 and sysclock/16 for h40. -// One scanline is 63.7us/63.5us (h32/h40) long which is 488.6/487.4 68k cycles. -// Assume 488 for everything. +// One scanline is 63.7us/64.3us (ntsc/pal) long which is ~488.57 68k cycles. +// Approximate by 488 for VDP. // 1 slot is 488/171 = 2.8538 68k cycles in h32, and 488/210 = 2.3238 in h40. enum { slcpu = 488 }; // VDP has a slot counter running from 0x00 to 0xff every scanline, but it has // a gap depending on the video mode. The slot in which a horizontal interrupt // is generated also depends on the video mode. +// NB Kabuto says gapend40 is 0xe4. That's technically correct, since slots 0xb6 +// and 0xe4 are only half slots. Ignore 0xe4 here and make 0xb6 a full slot. enum { hint32 = 0x85, gapstart32 = 0x94, gapend32 = 0xe9}; enum { hint40 = 0xa5, gapstart40 = 0xb7, gapend40 = 0xe5}; -// XXX Kabuto says gapend40 is 0xe4, but then a line would've 211 slots, while -// it's 210 in all other sources I looked at? // The horizontal sync period (HBLANK) is 30/37 slots (h32/h40): // h32: 4 slots front porch (1.49us), 13 HSYNC (4.84us), 13 back porch (4.84us) // h40: 5 slots front porch (1.49us), 16 HSYNC (4.77us), 16 back porch (4.77us) -// HBLANK starts in slot 0x93/0xb3 and ends after slot 0x05 (from Kabuto's doc) +// HBLANK starts at slot 0x93/0xb4 and ends in the middle of slot 0x05/0x06, +// NB VDP slows down the h40 clock to h32 during HSYNC for 17 slots to get the +// right sync timing. Ignored in the slot calculation, but hblen40 is correct. enum { hboff32 = 0x93-hint32, hblen32 = 0xf8-(gapend32-gapstart32)-hint32};//30 -enum { hboff40 = 0xb3-hint40, hblen40 = 0xf8-(gapend40-gapstart40)-hint40};//37 +enum { hboff40 = 0xb4-hint40, hblen40 = 0xf8-(gapend40-gapstart40)-hint40};//37 // number of slots in a scanline #define slots32 (0x100-(gapend32-gapstart32)) // 171 @@ -263,7 +265,7 @@ void PicoVideoFIFOSync(int cycles) // calculate #slots since last executed slot slots = Cyc2Sl(vf, cycles) - vf->fifo_slot; - if (!slots || !vf->fifo_ql) return; + if (slots <= 0 || !vf->fifo_ql) return; // advance FIFO queue by #done slots done = slots; @@ -308,7 +310,7 @@ static int PicoVideoFIFODrain(int level, int cycles, int bgdma) } } if (vf->fifo_ql && ((vf->fifo_total > level) | bd)) - cycles = 488; // not completed in this scanline + cycles = slcpu; // not completed in this scanline if (cycles > ocyc) burn = cycles - ocyc; @@ -430,7 +432,7 @@ void PicoVideoFIFOMode(int active, int h40) vf->fifo_hcounts = vdphcounts[h40]; // recalculate FIFO slot for new mode vf->fifo_slot = Cyc2Sl(vf, lc); - vf->fifo_maxslot = Cyc2Sl(vf, 488); + vf->fifo_maxslot = Cyc2Sl(vf, slcpu); } // VDP memory rd/wr @@ -1031,10 +1033,9 @@ update_irq: static u32 VideoSr(const struct PicoVideo *pv) { - unsigned int hp = pv->reg[12]&1 ? hboff40*488/slots40 : hboff32*488/slots32; - unsigned int hl = pv->reg[12]&1 ? hblen40*488/slots40 : hblen32*488/slots32; - // XXX -2 is to please notaz' testpico, but why is this? - unsigned int c = SekCyclesDone()-2 - Pico.t.m68c_line_start; + unsigned int hp = pv->reg[12]&1 ? hboff40*488.5/slots40 : hboff32*488.5/slots32; + unsigned int hl = pv->reg[12]&1 ? hblen40*488.5/slots40 : hblen32*488.5/slots32; + unsigned int c = SekCyclesDone() - Pico.t.m68c_line_start; u32 d; PicoVideoFIFOSync(c);