void Pico32xInit(void)
{
- if (msh2.mult_m68k_to_sh2 == 0 || msh2.mult_sh2_to_m68k == 0)
- Pico32xSetClocks(PICO_MSH2_HZ, 0);
- if (ssh2.mult_m68k_to_sh2 == 0 || ssh2.mult_sh2_to_m68k == 0)
- Pico32xSetClocks(0, PICO_MSH2_HZ);
}
void PicoPower32x(void)
Pico32x.vdp_regs[0x0a/2] &= ~P32XV_VBLK; // get out of vblank
if ((Pico32x.vdp_regs[0] & P32XV_Mx) != 0) // no forced blanking
Pico32x.vdp_regs[0x0a/2] &= ~P32XV_PEN; // no palette access
- if (!(Pico32x.sh2_regs[0] & 0x80))
+ if (!(Pico32x.sh2_regs[0] & 0x80)) {
+ // NB must precede VInt per hw manual, min 4 SH-2 cycles to pass Mars Check
+ Pico32x.hint_counter = -0x18;
p32x_schedule_hint(NULL, Pico.t.m68c_aim);
+ }
p32x_sh2_poll_event(msh2.poll_addr, &msh2, SH2_STATE_VPOLL, Pico.t.m68c_aim);
p32x_sh2_poll_event(ssh2.poll_addr, &ssh2, SH2_STATE_VPOLL, Pico.t.m68c_aim);
if (!(Pico32x.sh2_regs[0] & 0x80) && (Pico.video.status & PVS_VB2))
return;
- after = (Pico32x.sh2_regs[4 / 2] + 1) * 488;
+ Pico32x.hint_counter += (Pico32x.sh2_regs[4 / 2] + 1) * (int)(488.5*0x10);
+ after = Pico32x.hint_counter >> 4;
+ Pico32x.hint_counter &= 0xf;
if (sh2 != NULL)
p32x_event_schedule_sh2(sh2, P32X_EVENT_HINT, after);
else
return;
}
- if (sh2s[0].m68krcycles_done == 0 && sh2s[1].m68krcycles_done == 0)
+ if (CYCLES_GE(sh2s[0].m68krcycles_done - Pico.t.m68c_aim, 500) ||
+ CYCLES_GE(sh2s[1].m68krcycles_done - Pico.t.m68c_aim, 500))
sh2s[0].m68krcycles_done = sh2s[1].m68krcycles_done = SekCyclesDone();
p32x_update_irls(NULL, SekCyclesDone());
p32x_timers_recalc();
void Pico32xPrepare(void)
{
+ if (msh2.mult_m68k_to_sh2 == 0 || msh2.mult_sh2_to_m68k == 0)
+ Pico32xSetClocks(PICO_MSH2_HZ, 0);
+ if (ssh2.mult_m68k_to_sh2 == 0 || ssh2.mult_sh2_to_m68k == 0)
+ Pico32xSetClocks(0, PICO_MSH2_HZ);
+
sh2_execute_prepare(&msh2, PicoIn.opt & POPT_EN_DRC);
sh2_execute_prepare(&ssh2, PicoIn.opt & POPT_EN_DRC);
}
pprof_end(s68k);
}
-static void pcd_set_cycle_mult(void)
+void PicoMCDPrepare(void)
{
- unsigned int div;
-
- if (Pico.m.pal)
- div = 50*313*488;
- else
- div = 60*262*488;
-
- // ~1.63 for NTSC, ~1.645 for PAL; round to nearest, x/y+0.5 -> (x+y/2)/y
- mcd_m68k_cycle_mult = ((12500000ull << 16) + div/2) / div;
- mcd_s68k_cycle_mult = ((1ull*div << 16) + 6250000) / 12500000;
+ // ~1.63 for NTSC, ~1.645 for PAL
+#define DIV_ROUND(x,y) ((x)+(y)/2) / (y) // round to nearest, x/y+0.5 -> (x+y/2)/y
+ unsigned int osc = (Pico.m.pal ? OSC_PAL : OSC_NTSC);
+ mcd_m68k_cycle_mult = DIV_ROUND(12500000ull << 16, osc / 7);
+ mcd_s68k_cycle_mult = DIV_ROUND(1ull * osc << 16, 7 * 12500000);
}
unsigned int pcd_cycles_m68k_to_s68k(unsigned int c)
#define pcd_run_cpus_normal pcd_run_cpus
//#define pcd_run_cpus_lockstep pcd_run_cpus
+static void SekAimM68k(int cyc, int mult);
static int SekSyncM68k(int once);
void pcd_run_cpus_normal(int m68k_cycles)
{
- Pico.t.m68c_aim += m68k_cycles;
+ // TODO this is suspicious. ~1 cycle refresh delay every 256 cycles?
+ SekAimM68k(m68k_cycles, 0x43); // Fhey area
while (CYCLES_GT(Pico.t.m68c_aim, Pico.t.m68c_cnt)) {
if (SekShouldInterrupt()) {
void pcd_prepare_frame(void)
{
- pcd_set_cycle_mult();
-
// need this because we can't have direct mapping between
// master<->slave cycle counters because of overflows
mcd_m68k_cycle_base = Pico.t.m68c_aim;
unsigned int cycles;
int diff;
- pcd_set_cycle_mult();
pcd_state_loaded_mem();
memset(Pico_mcd->pcm_mixbuf, 0, sizeof(Pico_mcd->pcm_mixbuf));
// old savestates..
cycles = pcd_cycles_m68k_to_s68k(Pico.t.m68c_aim);
- diff = cycles - SekCycleAimS68k;
- if (diff < -1000 || diff > 1000) {
+ if (CYCLES_GE(cycles - SekCycleAimS68k, 1000)) {
SekCycleCntS68k = SekCycleAimS68k = cycles;
}
if (pcd_event_times[PCD_EVENT_CDC] == 0) {
if (is_from_z80) {\r
// ugh... compute by dividing cycles since frame start by cycles per line\r
// need some fractional resolution here, else there may be an extra line\r
- int cycles_line = cycles_68k_to_z80(488 << 8)+1; // cycles per line, as Q8\r
+ int cycles_line = cycles_68k_to_z80((unsigned)(488.5*256))+1; // cycles per line, Q8\r
int cycles_z80 = (z80_cyclesLeft<0 ? Pico.t.z80c_aim:z80_cyclesDone())<<8;\r
int cycles = cycles_line * Pico.t.z80_scanline;\r
// approximation by multiplying with inverse\r
- if (cycles_z80 - cycles >= 2*cycles_line) {\r
+ if (cycles_z80 - cycles >= 4*cycles_line) {\r
// compute 1/cycles_line, storing the result to avoid future dividing\r
static int cycles_line_o, cycles_line_i;\r
if (cycles_line_o != cycles_line)\r
\r
switch (addr)\r
{\r
- // NB, OD2 A/V sync HACK: lower timer step by 1/4 z80 cycle (=64 in Q8)\r
case 0x24: // timer A High 8\r
case 0x25: { // timer A Low 2\r
int TAnew = (addr == 0x24) ? ((ym2612.OPN.ST.TA & 0x03)|(((int)d)<<2))\r
ym2612.OPN.ST.TA = TAnew;\r
//ym2612.OPN.ST.TAC = (1024-TAnew)*18;\r
//ym2612.OPN.ST.TAT = 0;\r
- Pico.t.timer_a_step = TIMER_A_TICK_ZCYCLES * (1024 - TAnew) - 64;\r
+ Pico.t.timer_a_step = TIMER_A_TICK_ZCYCLES * (1024 - TAnew);\r
elprintf(EL_YMTIMER, "timer a set to %i, %i", 1024 - TAnew, Pico.t.timer_a_next_oflow>>8);\r
}\r
return 0;\r
ym2612.OPN.ST.TB = d;\r
//ym2612.OPN.ST.TBC = (256-d) * 288;\r
//ym2612.OPN.ST.TBT = 0;\r
- Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * (256 - d) - 64;\r
+ Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * (256 - d);\r
elprintf(EL_YMTIMER, "timer b set to %i, %i", 256 - d, Pico.t.timer_b_next_oflow>>8);\r
}\r
return 0;\r
// until an additional cycle is full. That is then added to the integer part.\r
Pico.t.z80_busdelay = (delay&0xff) + (Pico.t.z80_busdelay&0xff); // accumulate\r
z80_subCLeft((delay>>8) + (Pico.t.z80_busdelay>>8));\r
- // don't use SekCyclesBurn(7) here since the Z80 doesn't run in cycle lock to\r
+ // don't use SekCyclesBurn() here since the Z80 doesn't run in cycle lock to\r
// the 68K. Count the stolen cycles to be accounted later in the 68k CPU runs\r
Pico.t.z80_buscycles += 7;\r
}\r
static unsigned char z80_md_vdp_read(unsigned short a)\r
{\r
if ((a & 0xff00) == 0x7f00) {\r
- // 68k bus access delay=3.3 per kabuto, for notaz picotest 2.4<=delay<2.55?\r
- access_68k_bus(0x280); // Q8, picotest: 0x266(>=2.4) - 0x28b(<2.55)\r
+ // 68k bus access delay=3.3 per kabuto, for notaz picotest 2.42<delay<2.57?\r
+ access_68k_bus(0x280); // Q8, picotest: 0x26d(>2.42) - 0x292(<2.57)\r
\r
switch (a & 0x0d)\r
{\r
unsigned int addr68k;\r
unsigned char ret;\r
\r
- // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.0<delay<3.3\r
- access_68k_bus(0x340); // // Q8, picotest: 0x301(>3.0)-0x34c(<3.3)\r
+ // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.02<delay<3.32\r
+ access_68k_bus(0x340); // Q8, picotest: 0x306(>3.02)-0x351(<3.32)\r
\r
addr68k = Pico.m.z80_bank68k << 15;\r
addr68k |= a & 0x7fff;\r
{\r
unsigned int addr68k;\r
\r
- // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.0<delay<3.3\r
- access_68k_bus(0x340); // // Q8, picotest: 0x301(>3.0)-0x34c(<3.3)\r
+ // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.02<delay<3.32\r
+ access_68k_bus(0x340); // Q8, picotest: 0x306(>3.02)-0x351(<3.32)\r
\r
addr68k = Pico.m.z80_bank68k << 15;\r
addr68k += a & 0x7fff;\r
Pico.m.dirtyPal = 1;\r
rendstatus_old = -1;\r
\r
+ if (PicoIn.AHW & PAHW_MCD)\r
+ PicoMCDPrepare();\r
if (PicoIn.AHW & PAHW_32X)\r
Pico32xPrepare();\r
}\r
return Pico.t.m68c_aim > Pico.t.m68c_cnt;
}
-static __inline void SekRunM68k(int cyc)
+static __inline void SekAimM68k(int cyc, int mult)
{
- // refresh slowdown handling, 2 cycles every 128 - make this 1 every 64
+ // refresh slowdown, for cart: 2 cycles every 128 - make this 1 every 64,
+ // for RAM: seems to be 0-3 every 128. Carts usually run from the cart
+ // area, but MCD games only use RAM, hence a different multiplier is needed.
// NB must be quite accurate, so handle fractions as well (c/f OutRunners)
- static int refresh;
- Pico.t.m68c_cnt += (cyc + refresh) >> 6;
- refresh = (cyc + refresh) & 0x3f;
+ int delay = (Pico.t.refresh_delay += cyc*mult) >> 14;
+ Pico.t.m68c_cnt += delay;
+ Pico.t.refresh_delay -= delay << 14;
Pico.t.m68c_aim += cyc;
+}
+static __inline void SekRunM68k(int cyc)
+{
+ // TODO 0x100 would by 2 cycles/128, moreover far too sensitive
+ SekAimM68k(cyc, 0x10c); // OutRunners, testpico, VDPFIFOTesting
SekSyncM68k(0);
}
PicoVideoFIFOSync(CYCLES_M68K_LINE);
// need rather tight Z80 sync for emulation of main bus cycle stealing
- if (Pico.m.scanline&1) {
+ if (Pico.m.scanline&1)
if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80))
PicoSyncZ80(Pico.t.m68c_aim);
- }
}
static void do_timing_hacks_start(struct PicoVideo *pv)
// XXX how to handle Z80 bus cycle stealing during DMA correctly?
if ((Pico.t.z80_buscycles -= cycles) < 0)
Pico.t.z80_buscycles = 0;
+ if (Pico.m.scanline&1)
+ Pico.t.m68c_aim += 1; // add cycle each other line for 488.5 cycles/line
}
static int PicoFrameHints(void)
}
// decide if we draw this line
- if (!skip && (PicoIn.opt & POPT_ALT_RENDERER))
+ if ((PicoIn.opt & POPT_ALT_RENDERER) && !skip)
{
// find the right moment for frame renderer, when display is no longer blanked
if ((pv->reg[1]&0x40) || y > 100) {
#define z80_cyclesDone() \\r
(Pico.t.z80c_aim - z80_cyclesLeft)\r
\r
-// 68k clock = OSC/7, z80 clock = OSC/15, 68k:z80 ratio = 7/15*8192=3822.9\r
+// 68k clock = OSC/7, z80 clock = OSC/15, 68k:z80 ratio = 7/15 = 3822.9/8192\r
#define cycles_68k_to_z80(x) ((x) * 3823 >> 13)\r
\r
// ----------------------- SH2 CPU -----------------------\r
unsigned int m68c_aim;\r
unsigned int m68c_frame_start; // m68k cycles\r
unsigned int m68c_line_start;\r
+ int refresh_delay;\r
\r
unsigned int z80c_cnt; // z80 cycles done (this frame)\r
unsigned int z80c_aim;\r
unsigned int stopwatch_base_c;\r
unsigned short m68k_poll_a;\r
unsigned short m68k_poll_cnt;\r
- unsigned short s68k_poll_a;\r
+ unsigned short s68k_poll_a; // 10\r
unsigned short s68k_poll_cnt;\r
unsigned int s68k_poll_clk;\r
unsigned char bcram_reg; // 18: battery-backed RAM cart register\r
unsigned char pad1;\r
unsigned short pwm_p[2]; // pwm pos in fifo\r
unsigned int pwm_cycle_p; // pwm play cursor (32x cycles)\r
- unsigned int reserved[6];\r
+ unsigned int hint_counter;\r
+ unsigned int reserved[5];\r
};\r
\r
struct Pico32xMem\r
PICO_INTERNAL void PicoPowerMCD(void);\r
PICO_INTERNAL int PicoResetMCD(void);\r
PICO_INTERNAL void PicoFrameMCD(void);\r
+PICO_INTERNAL void PicoMCDPrepare(void);\r
\r
enum pcd_event {\r
PCD_EVENT_CDC,\r
// samples per line (Q16)\r
Pico.snd.smpl_mult = 65536LL * PicoIn.sndRate / (target_fps*target_lines);\r
// samples per z80 clock (Q20)\r
- Pico.snd.clkl_mult = 16 * Pico.snd.smpl_mult * 15/7 / 488;\r
+ Pico.snd.clkl_mult = 16 * Pico.snd.smpl_mult * 15/7 / 488.5;\r
// samples per 44.1 KHz sample\r
Pico.snd.cdda_mult = 65536LL * 44100 / PicoIn.sndRate;\r
Pico.snd.cdda_div = 65536LL * PicoIn.sndRate / 44100;\r
// Thank you very much for the great work, Nemesis, Kabuto!\r
\r
// Slot clock is sysclock/20 for h32 and sysclock/16 for h40.\r
-// One scanline is 63.7us/63.5us (h32/h40) long which is 488.6/487.4 68k cycles.\r
-// Assume 488 for everything.\r
+// One scanline is 63.7us/64.3us (ntsc/pal) long which is ~488.57 68k cycles.\r
+// Approximate by 488 for VDP.\r
// 1 slot is 488/171 = 2.8538 68k cycles in h32, and 488/210 = 2.3238 in h40.\r
enum { slcpu = 488 };\r
\r
// VDP has a slot counter running from 0x00 to 0xff every scanline, but it has\r
// a gap depending on the video mode. The slot in which a horizontal interrupt\r
// is generated also depends on the video mode.\r
+// NB Kabuto says gapend40 is 0xe4. That's technically correct, since slots 0xb6\r
+// and 0xe4 are only half slots. Ignore 0xe4 here and make 0xb6 a full slot.\r
enum { hint32 = 0x85, gapstart32 = 0x94, gapend32 = 0xe9};\r
enum { hint40 = 0xa5, gapstart40 = 0xb7, gapend40 = 0xe5};\r
-// XXX Kabuto says gapend40 is 0xe4, but then a line would've 211 slots, while\r
-// it's 210 in all other sources I looked at?\r
\r
// The horizontal sync period (HBLANK) is 30/37 slots (h32/h40):\r
// h32: 4 slots front porch (1.49us), 13 HSYNC (4.84us), 13 back porch (4.84us)\r
// h40: 5 slots front porch (1.49us), 16 HSYNC (4.77us), 16 back porch (4.77us)\r
-// HBLANK starts in slot 0x93/0xb3 and ends after slot 0x05 (from Kabuto's doc)\r
+// HBLANK starts at slot 0x93/0xb4 and ends in the middle of slot 0x05/0x06,\r
+// NB VDP slows down the h40 clock to h32 during HSYNC for 17 slots to get the\r
+// right sync timing. Ignored in the slot calculation, but hblen40 is correct.\r
enum { hboff32 = 0x93-hint32, hblen32 = 0xf8-(gapend32-gapstart32)-hint32};//30\r
-enum { hboff40 = 0xb3-hint40, hblen40 = 0xf8-(gapend40-gapstart40)-hint40};//37\r
+enum { hboff40 = 0xb4-hint40, hblen40 = 0xf8-(gapend40-gapstart40)-hint40};//37\r
\r
// number of slots in a scanline\r
#define slots32 (0x100-(gapend32-gapstart32)) // 171\r
\r
// calculate #slots since last executed slot\r
slots = Cyc2Sl(vf, cycles) - vf->fifo_slot;\r
- if (!slots || !vf->fifo_ql) return;\r
+ if (slots <= 0 || !vf->fifo_ql) return;\r
\r
// advance FIFO queue by #done slots\r
done = slots;\r
}\r
}\r
if (vf->fifo_ql && ((vf->fifo_total > level) | bd))\r
- cycles = 488; // not completed in this scanline\r
+ cycles = slcpu; // not completed in this scanline\r
if (cycles > ocyc)\r
burn = cycles - ocyc;\r
\r
vf->fifo_hcounts = vdphcounts[h40];\r
// recalculate FIFO slot for new mode\r
vf->fifo_slot = Cyc2Sl(vf, lc);\r
- vf->fifo_maxslot = Cyc2Sl(vf, 488);\r
+ vf->fifo_maxslot = Cyc2Sl(vf, slcpu);\r
}\r
\r
// VDP memory rd/wr\r
\r
static u32 VideoSr(const struct PicoVideo *pv)\r
{\r
- unsigned int hp = pv->reg[12]&1 ? hboff40*488/slots40 : hboff32*488/slots32;\r
- unsigned int hl = pv->reg[12]&1 ? hblen40*488/slots40 : hblen32*488/slots32;\r
- // XXX -2 is to please notaz' testpico, but why is this?\r
- unsigned int c = SekCyclesDone()-2 - Pico.t.m68c_line_start;\r
+ unsigned int hp = pv->reg[12]&1 ? hboff40*488.5/slots40 : hboff32*488.5/slots32;\r
+ unsigned int hl = pv->reg[12]&1 ? hblen40*488.5/slots40 : hblen32*488.5/slots32;\r
+ unsigned int c = SekCyclesDone() - Pico.t.m68c_line_start;\r
u32 d;\r
\r
PicoVideoFIFOSync(c);\r