From ed4402a7dfd12dbbf34c547b438a671ae8114197 Mon Sep 17 00:00:00 2001
From: notaz <notasas@gmail.com>
Date: Sun, 7 Jul 2013 01:05:11 +0300
Subject: [PATCH] 32x: start reworking sheduling

---
 cpu/sh2/compiler.c            |   7 +-
 cpu/sh2/mame/sh2pico.c        |  11 +--
 cpu/sh2/sh2.c                 |   8 +-
 cpu/sh2/sh2.h                 |  16 +++-
 pico/32x/32x.c                | 154 +++++++++++++++++++---------------
 pico/32x/memory.c             |   2 +-
 pico/cd/pico.c                |  26 +-----
 pico/pico.c                   |  26 ------
 pico/pico.h                   |  22 +++--
 pico/pico_cmn.c               |  23 +++++
 pico/pico_int.h               |   8 +-
 pico/state.c                  |   4 +-
 platform/common/config_file.c |  11 ++-
 platform/common/emu.c         |   4 +-
 platform/common/emu.h         |   2 +
 platform/common/menu_pico.c   |  24 +++---
 platform/libretro.c           |   3 -
 17 files changed, 184 insertions(+), 167 deletions(-)

diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c
index 392af29c..5b52694a 100644
--- a/cpu/sh2/compiler.c
+++ b/cpu/sh2/compiler.c
@@ -2978,13 +2978,12 @@ void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid)
     1 + cpuid, SH2_DRCBLK_DA_SHIFT, 0xfff);
 }
 
-void sh2_execute(SH2 *sh2c, int cycles)
+int sh2_execute(SH2 *sh2c, int cycles)
 {
   int ret_cycles;
   sh2 = sh2c; // XXX
 
-  sh2c->cycles_aim += cycles;
-  cycles = sh2c->cycles_aim - sh2c->cycles_done;
+  sh2c->cycles_timeslice = cycles;
 
   // cycles are kept in SHR_SR unused bits (upper 20)
   // bit19 contains T saved for delay slot
@@ -2998,7 +2997,7 @@ void sh2_execute(SH2 *sh2c, int cycles)
   if (ret_cycles > 0)
     dbg(1, "warning: drc returned with cycles: %d", ret_cycles);
 
-  sh2c->cycles_done += cycles - ret_cycles;
+  return sh2c->cycles_timeslice - ret_cycles;
 }
 
 #if (DRC_DEBUG & 2)
diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c
index 233e0a1f..d7883160 100644
--- a/cpu/sh2/mame/sh2pico.c
+++ b/cpu/sh2/mame/sh2pico.c
@@ -61,14 +61,15 @@ static unsigned int op_refs[0x10000];
 
 #ifndef DRC_SH2
 
-void sh2_execute(SH2 *sh2_, int cycles)
+int sh2_execute(SH2 *sh2_, int cycles)
 {
 	sh2 = sh2_;
-	sh2->cycles_aim += cycles;
-	sh2->icount = cycles = sh2->cycles_aim - sh2->cycles_done;
+	sh2->icount = cycles;
 
 	if (sh2->icount <= 0)
-		return;
+		return cycles;
+
+	sh2->cycles_timeslice = cycles;
 
 	do
 	{
@@ -122,7 +123,7 @@ void sh2_execute(SH2 *sh2_, int cycles)
 	}
 	while (sh2->icount > 0 || sh2->delay);	/* can't interrupt before delay */
 
-	sh2->cycles_done += cycles - sh2->icount;
+	return sh2->cycles_timeslice - sh2->icount;
 }
 
 #else // DRC_SH2
diff --git a/cpu/sh2/sh2.c b/cpu/sh2/sh2.c
index dbd4c2d8..937f8e42 100644
--- a/cpu/sh2/sh2.c
+++ b/cpu/sh2/sh2.c
@@ -20,7 +20,7 @@ int sh2_init(SH2 *sh2, int is_slave)
 {
 	int ret = 0;
 
-	memset(sh2, 0, sizeof(*sh2));
+	memset(sh2, 0, offsetof(SH2, mult_m68k_to_sh2));
 	sh2->is_slave = is_slave;
 	pdb_register_cpu(sh2, PDBCT_SH2, is_slave ? "ssh2" : "msh2");
 #ifdef DRC_SH2
@@ -59,7 +59,7 @@ void sh2_do_irq(SH2 *sh2, int level, int vector)
 	sh2->pc = p32x_sh2_read32(sh2->vbr + vector * 4, sh2);
 
 	/* 13 cycles at best */
-	sh2->cycles_done += 13;
+	sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, 13);
 //	sh2->icount -= 13;
 }
 
@@ -105,8 +105,6 @@ void sh2_pack(const SH2 *sh2, unsigned char *buff)
 
 	p[0] = sh2->pending_int_irq;
 	p[1] = sh2->pending_int_vector;
-	p[2] = sh2->cycles_aim;
-	p[3] = sh2->cycles_done;
 }
 
 void sh2_unpack(SH2 *sh2, const unsigned char *buff)
@@ -118,7 +116,5 @@ void sh2_unpack(SH2 *sh2, const unsigned char *buff)
 
 	sh2->pending_int_irq = p[0];
 	sh2->pending_int_vector = p[1];
-	sh2->cycles_aim = p[2];
-	sh2->cycles_done = p[3];
 }
 
diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h
index e15259d5..a3d631d6 100644
--- a/cpu/sh2/sh2.h
+++ b/cpu/sh2/sh2.h
@@ -52,10 +52,20 @@ typedef struct SH2_
 	int	REGPARM(2) (*irq_callback)(struct SH2_ *sh2, int level);
 	int	is_slave;
 
-	unsigned int	cycles_aim;	// subtract sh2_icount to get global counter
-	unsigned int	cycles_done;
+	unsigned int	cycles_timeslice;
+
+	// we use 68k reference cycles for easier sync
+	unsigned int	m68krcycles_done;
+	unsigned int	mult_m68k_to_sh2;
+	unsigned int	mult_sh2_to_m68k;
 } SH2;
 
+#define CYCLE_MULT_SHIFT 10
+#define C_M68K_TO_SH2(xsh2, c) \
+	((int)((c) * (xsh2).mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT)
+#define C_SH2_TO_M68K(xsh2, c) \
+	((int)((c + 3) * (xsh2).mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT)
+
 extern SH2 *sh2; // active sh2. XXX: consider removing
 
 int  sh2_init(SH2 *sh2, int is_slave);
@@ -67,7 +77,7 @@ void sh2_do_irq(SH2 *sh2, int level, int vector);
 void sh2_pack(const SH2 *sh2, unsigned char *buff);
 void sh2_unpack(SH2 *sh2, const unsigned char *buff);
 
-void sh2_execute(SH2 *sh2, int cycles);
+int  sh2_execute(SH2 *sh2, int cycles);
 
 // regs, pending_int*, cycles, reserved
 #define SH2_STATE_SIZE ((24 + 2 + 2 + 12) * 4)
diff --git a/pico/32x/32x.c b/pico/32x/32x.c
index 4e036000..5581b094 100644
--- a/pico/32x/32x.c
+++ b/pico/32x/32x.c
@@ -120,10 +120,16 @@ void p32x_reset_sh2s(void)
     sh2_set_vbr(1, vbr);
     // program will set S_OK
   }
+
+  msh2.m68krcycles_done = ssh2.m68krcycles_done = SekCyclesDoneT();
 }
 
 void Pico32xInit(void)
 {
+  if (msh2.mult_m68k_to_sh2 == 0 || msh2.mult_sh2_to_m68k == 0)
+    Pico32xSetClocks(PICO_MSH2_HZ, 0);
+  if (ssh2.mult_m68k_to_sh2 == 0 || ssh2.mult_sh2_to_m68k == 0)
+    Pico32xSetClocks(0, PICO_MSH2_HZ);
 }
 
 void PicoPower32x(void)
@@ -199,81 +205,76 @@ static void p32x_start_blank(void)
   p32x_poll_event(3, 1);
 }
 
-static __inline void run_m68k(int cyc)
-{
-  pprof_start(m68k);
-
-p32x_poll_event(3, 0);
-#if defined(EMU_C68K)
-  PicoCpuCM68k.cycles = cyc;
-  CycloneRun(&PicoCpuCM68k);
-  SekCycleCnt += cyc - PicoCpuCM68k.cycles;
-#elif defined(EMU_M68K)
-  SekCycleCnt += m68k_execute(cyc);
-#elif defined(EMU_F68K)
-  SekCycleCnt += fm68k_emulate(cyc+1, 0, 0);
-#endif
-
-  pprof_end(m68k);
-}
+#define sync_sh2s_normal p32x_sync_sh2s
+//#define sync_sh2s_lockstep p32x_sync_sh2s
 
-// ~1463.8, but due to cache misses and slow mem
-// it's much lower than that
-//#define SH2_LINE_CYCLES 735
-#define CYCLES_M68K2MSH2(x) (((x) * p32x_msh2_multiplier) >> 10)
-#define CYCLES_M68K2SSH2(x) (((x) * p32x_ssh2_multiplier) >> 10)
-
-#define PICO_32X
-#define CPUS_RUN_SIMPLE(m68k_cycles,s68k_cycles) \
-{ \
-  int slice; \
-  SekCycleAim += m68k_cycles; \
-  while (SekCycleCnt < SekCycleAim) { \
-    slice = SekCycleCnt; \
-    run_m68k(SekCycleAim - SekCycleCnt); \
-    if (!(Pico32x.regs[0] & P32XS_nRES)) \
-      continue; /* SH2s reseting */ \
-    slice = SekCycleCnt - slice; /* real count from 68k */ \
-    if (SekCycleCnt < SekCycleAim) \
-      elprintf(EL_32X, "slice %d", slice); \
-    if (!(Pico32x.emu_flags & (P32XF_SSH2POLL|P32XF_SSH2VPOLL))) { \
-      pprof_start(ssh2); \
-      sh2_execute(&ssh2, CYCLES_M68K2SSH2(slice)); \
-      pprof_end(ssh2); \
-    } \
-    if (!(Pico32x.emu_flags & (P32XF_MSH2POLL|P32XF_MSH2VPOLL))) { \
-      pprof_start(msh2); \
-      sh2_execute(&msh2, CYCLES_M68K2MSH2(slice)); \
-      pprof_end(msh2); \
-    } \
-    pprof_start(dummy); \
-    pprof_end(dummy); \
-  } \
+void sync_sh2s_normal(unsigned int m68k_target)
+{
+  unsigned int target = m68k_target;
+  int msh2_cycles, ssh2_cycles;
+  int done;
+
+  elprintf(EL_32X, "sh2 sync to %u (%u)", m68k_target, SekCycleCnt);
+
+  if (!(Pico32x.regs[0] & P32XS_nRES))
+    return; // rare
+
+  {
+    msh2_cycles = C_M68K_TO_SH2(msh2, target - msh2.m68krcycles_done);
+    ssh2_cycles = C_M68K_TO_SH2(ssh2, target - ssh2.m68krcycles_done);
+
+    while (msh2_cycles > 0 || ssh2_cycles > 0) {
+      elprintf(EL_32X, "sh2 exec %u,%u->%u",
+        msh2.m68krcycles_done, ssh2.m68krcycles_done, target);
+
+      if (Pico32x.emu_flags & (P32XF_SSH2POLL|P32XF_SSH2VPOLL)) {
+        ssh2.m68krcycles_done = target;
+        ssh2_cycles = 0;
+      }
+      else if (ssh2_cycles > 0) {
+        done = sh2_execute(&ssh2, ssh2_cycles);
+        ssh2.m68krcycles_done += C_SH2_TO_M68K(ssh2, done);
+
+        ssh2_cycles = C_M68K_TO_SH2(ssh2, target - ssh2.m68krcycles_done);
+      }
+
+      if (Pico32x.emu_flags & (P32XF_MSH2POLL|P32XF_MSH2VPOLL)) {
+        msh2.m68krcycles_done = target;
+        msh2_cycles = 0;
+      }
+      else if (msh2_cycles > 0) {
+        done = sh2_execute(&msh2, msh2_cycles);
+        msh2.m68krcycles_done += C_SH2_TO_M68K(msh2, done);
+
+        msh2_cycles = C_M68K_TO_SH2(msh2, target - msh2.m68krcycles_done);
+      }
+    }
+  }
 }
 
 #define STEP_68K 24
-#define CPUS_RUN_LOCKSTEP(m68k_cycles,s68k_cycles) \
-{ \
-  int slice; \
-  SekCycleAim += m68k_cycles; \
-  while (SekCycleCnt < SekCycleAim) { \
-    slice = SekCycleCnt; \
-    run_m68k(STEP_68K); \
-    if (!(Pico32x.regs[0] & P32XS_nRES)) \
-      continue; /* SH2s reseting */ \
-    slice = SekCycleCnt - slice; /* real count from 68k */ \
-    if (!(Pico32x.emu_flags & (P32XF_SSH2POLL|P32XF_SSH2VPOLL))) { \
-      sh2_execute(&ssh2, CYCLES_M68K2SSH2(slice)); \
-    } \
-    if (!(Pico32x.emu_flags & (P32XF_MSH2POLL|P32XF_MSH2VPOLL))) { \
-      sh2_execute(&msh2, CYCLES_M68K2MSH2(slice)); \
-    } \
-  } \
+
+void sync_sh2s_lockstep(unsigned int m68k_target)
+{
+  unsigned int mcycles;
+  
+  mcycles = msh2.m68krcycles_done;
+  if (ssh2.m68krcycles_done < mcycles)
+    mcycles = ssh2.m68krcycles_done;
+
+  while (mcycles < m68k_target) {
+    mcycles += STEP_68K;
+    sync_sh2s_normal(mcycles);
+  }
 }
 
-#define CPUS_RUN CPUS_RUN_SIMPLE
-//#define CPUS_RUN CPUS_RUN_LOCKSTEP
+#define CPUS_RUN(m68k_cycles,s68k_cycles) do { \
+  SekRunM68k(m68k_cycles); \
+  if (SekIsStoppedM68k()) \
+    p32x_sync_sh2s(SekCycleCntT + SekCycleCnt); \
+} while (0)
 
+#define PICO_32X
 #include "../pico_cmn.c"
 
 void PicoFrame32x(void)
@@ -291,3 +292,20 @@ void PicoFrame32x(void)
   elprintf(EL_32X, "poll: %02x", Pico32x.emu_flags);
 }
 
+// calculate multipliers against 68k clock (7670442)
+// normally * 3, but effectively slower due to high latencies everywhere
+// however using something lower breaks MK2 animations
+void Pico32xSetClocks(int msh2_hz, int ssh2_hz)
+{
+  float m68k_clk = (float)(OSC_NTSC / 7);
+  if (msh2_hz > 0) {
+    msh2.mult_m68k_to_sh2 = (int)((float)msh2_hz * (1 << CYCLE_MULT_SHIFT) / m68k_clk);
+    msh2.mult_sh2_to_m68k = (int)(m68k_clk * (1 << CYCLE_MULT_SHIFT) / (float)msh2_hz);
+  }
+  if (ssh2_hz > 0) {
+    ssh2.mult_m68k_to_sh2 = (int)((float)ssh2_hz * (1 << CYCLE_MULT_SHIFT) / m68k_clk);
+    ssh2.mult_sh2_to_m68k = (int)(m68k_clk * (1 << CYCLE_MULT_SHIFT) / (float)ssh2_hz);
+  }
+}
+
+// vim:shiftwidth=2:ts=2:expandtab
diff --git a/pico/32x/memory.c b/pico/32x/memory.c
index a95db24c..11f07b75 100644
--- a/pico/32x/memory.c
+++ b/pico/32x/memory.c
@@ -1558,4 +1558,4 @@ void Pico32xStateLoaded(void)
 #endif
 }
 
-// vim:shiftwidth=2:expandtab
+// vim:shiftwidth=2:ts=2:expandtab
diff --git a/pico/cd/pico.c b/pico/cd/pico.c
index 8bd716fe..7549d542 100644
--- a/pico/cd/pico.c
+++ b/pico/cd/pico.c
@@ -68,30 +68,6 @@ PICO_INTERNAL int PicoResetMCD(void)
   return 0;
 }
 
-static __inline void SekRunM68k(int cyc)
-{
-  int cyc_do;
-
-  pprof_start(m68k);
-
-  SekCycleAim+=cyc;
-  if ((cyc_do=SekCycleAim-SekCycleCnt) <= 0) return;
-#if defined(EMU_CORE_DEBUG)
-  SekCycleCnt+=CM_compareRun(cyc_do, 0);
-#elif defined(EMU_C68K)
-  PicoCpuCM68k.cycles=cyc_do;
-  CycloneRun(&PicoCpuCM68k);
-  SekCycleCnt+=cyc_do-PicoCpuCM68k.cycles;
-#elif defined(EMU_M68K)
-  m68k_set_context(&PicoCpuMM68k);
-  SekCycleCnt+=m68k_execute(cyc_do);
-#elif defined(EMU_F68K)
-  g_m68kcontext=&PicoCpuFM68k;
-  SekCycleCnt+=fm68k_emulate(cyc_do, 0, 0);
-#endif
-  pprof_end(m68k);
-}
-
 static __inline void SekRunS68k(int cyc)
 {
   int cyc_do;
@@ -106,9 +82,11 @@ static __inline void SekRunS68k(int cyc)
 #elif defined(EMU_M68K)
   m68k_set_context(&PicoCpuMS68k);
   SekCycleCntS68k+=m68k_execute(cyc_do);
+  m68k_set_context(&PicoCpuMM68k);
 #elif defined(EMU_F68K)
   g_m68kcontext=&PicoCpuFS68k;
   SekCycleCntS68k+=fm68k_emulate(cyc_do, 0, 0);
+  g_m68kcontext=&PicoCpuFM68k;
 #endif
 }
 
diff --git a/pico/pico.c b/pico/pico.c
index 6a065e67..b8e76b38 100644
--- a/pico/pico.c
+++ b/pico/pico.c
@@ -23,9 +23,6 @@ struct PicoSRAM SRam;
 int emustatus;         // rapid_ym2612, multi_ym_updates
 int scanlines_total;
 
-int p32x_msh2_multiplier = MSH2_MULTI_DEFAULT;
-int p32x_ssh2_multiplier = SSH2_MULTI_DEFAULT;
-
 void (*PicoWriteSound)(int len) = NULL; // called at the best time to send sound buffer (PsndOut) to hardware
 void (*PicoResetHook)(void) = NULL;
 void (*PicoLineHook)(void) = NULL;
@@ -273,29 +270,6 @@ PICO_INTERNAL int CheckDMA(void)
   return burn;
 }
 
-static __inline void SekRunM68k(int cyc)
-{
-  int cyc_do;
-  pprof_start(m68k);
-
-  SekCycleAim+=cyc;
-  if ((cyc_do=SekCycleAim-SekCycleCnt) <= 0) return;
-#if defined(EMU_CORE_DEBUG)
-  // this means we do run-compare
-  SekCycleCnt+=CM_compareRun(cyc_do, 0);
-#elif defined(EMU_C68K)
-  PicoCpuCM68k.cycles=cyc_do;
-  CycloneRun(&PicoCpuCM68k);
-  SekCycleCnt+=cyc_do-PicoCpuCM68k.cycles;
-#elif defined(EMU_M68K)
-  SekCycleCnt+=m68k_execute(cyc_do);
-#elif defined(EMU_F68K)
-  SekCycleCnt+=fm68k_emulate(cyc_do+1, 0, 0);
-#endif
-
-  pprof_end(m68k);
-}
-
 #include "pico_cmn.c"
 
 int z80stopCycle;
diff --git a/pico/pico.h b/pico/pico.h
index b5d34f34..667b6c25 100644
--- a/pico/pico.h
+++ b/pico/pico.h
@@ -212,18 +212,24 @@ extern unsigned short *PicoCramHigh; // pointer to CRAM buff (0x40 shorts), conv
 extern void (*PicoPrepareCram)();    // prepares PicoCramHigh for renderer to use
 
 // pico.c (32x)
-// multipliers against 68k clock (7670442)
-// normally * 3, but effectively slower due to high latencies everywhere
-// however using something lower breaks MK2 animations
-extern int p32x_msh2_multiplier;
-extern int p32x_ssh2_multiplier;
-#define SH2_MULTI_SHIFT 10
-#define MSH2_MULTI_DEFAULT ((1 << SH2_MULTI_SHIFT) * 3)
-#define SSH2_MULTI_DEFAULT ((1 << SH2_MULTI_SHIFT) * 3)
+#ifndef NO_32X
+
+void Pico32xSetClocks(int msh2_hz, int ssh2_hz);
 
 // 32x/draw.c
 void PicoDraw32xSetFrameMode(int is_on, int only_32x);
 
+#else
+
+#define Pico32xSetClocks(msh2_khz, ssh2_khz)
+
+#endif
+
+// normally 68k clock (7670442) * 3, in reality but much lower
+// because of high memory latencies
+#define PICO_MSH2_HZ ((int)(7670442.0 * 2.4))
+#define PICO_SSH2_HZ ((int)(7670442.0 * 2.4))
+
 // sound.c
 extern int PsndRate,PsndLen;
 extern short *PsndOut;
diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c
index 8499f93f..7178440d 100644
--- a/pico/pico_cmn.c
+++ b/pico/pico_cmn.c
@@ -26,6 +26,29 @@
   SekRunM68k(m68k_cycles)
 #endif
 
+static __inline void SekRunM68k(int cyc)
+{
+  int cyc_do;
+  pprof_start(m68k);
+
+  SekCycleAim+=cyc;
+  if ((cyc_do=SekCycleAim-SekCycleCnt) <= 0) return;
+#if defined(EMU_CORE_DEBUG)
+  // this means we do run-compare
+  SekCycleCnt+=CM_compareRun(cyc_do, 0);
+#elif defined(EMU_C68K)
+  PicoCpuCM68k.cycles=cyc_do;
+  CycloneRun(&PicoCpuCM68k);
+  SekCycleCnt+=cyc_do-PicoCpuCM68k.cycles;
+#elif defined(EMU_M68K)
+  SekCycleCnt+=m68k_execute(cyc_do);
+#elif defined(EMU_F68K)
+  SekCycleCnt+=fm68k_emulate(cyc_do+1, 0, 0);
+#endif
+
+  pprof_end(m68k);
+}
+
 static int PicoFrameHints(void)
 {
   struct PicoVideo *pv=&Pico.video;
diff --git a/pico/pico_int.h b/pico/pico_int.h
index c206aa8e..f5b4cd47 100644
--- a/pico/pico_int.h
+++ b/pico/pico_int.h
@@ -50,6 +50,7 @@ extern struct Cyclone PicoCpuCM68k, PicoCpuCS68k;
 #define SekSr     CycloneGetSr(&PicoCpuCM68k)
 #define SekSetStop(x) { PicoCpuCM68k.state_flags&=~1; if (x) { PicoCpuCM68k.state_flags|=1; PicoCpuCM68k.cycles=0; } }
 #define SekSetStopS68k(x) { PicoCpuCS68k.state_flags&=~1; if (x) { PicoCpuCS68k.state_flags|=1; PicoCpuCS68k.cycles=0; } }
+#define SekIsStoppedM68k() (PicoCpuCM68k.state_flags&1)
 #define SekIsStoppedS68k() (PicoCpuCS68k.state_flags&1)
 #define SekShouldInterrupt (PicoCpuCM68k.irq > (PicoCpuCM68k.srh&7))
 
@@ -83,6 +84,7 @@ extern M68K_CONTEXT PicoCpuFM68k, PicoCpuFS68k;
 	PicoCpuFS68k.execinfo &= ~FM68K_HALTED; \
 	if (x) { PicoCpuFS68k.execinfo |= FM68K_HALTED; PicoCpuFS68k.io_cycle_counter = 0; } \
 }
+#define SekIsStoppedM68k() (PicoCpuFM68k.execinfo&FM68K_HALTED)
 #define SekIsStoppedS68k() (PicoCpuFS68k.execinfo&FM68K_HALTED)
 #define SekShouldInterrupt fm68k_would_interrupt()
 
@@ -117,6 +119,7 @@ extern m68ki_cpu_core PicoCpuMM68k, PicoCpuMS68k;
 	if(x) { SET_CYCLES(0); PicoCpuMS68k.stopped=STOP_LEVEL_STOP; } \
 	else PicoCpuMS68k.stopped=0; \
 }
+#define SekIsStoppedM68k() (PicoCpuMM68k.stopped==STOP_LEVEL_STOP)
 #define SekIsStoppedS68k() (PicoCpuMS68k.stopped==STOP_LEVEL_STOP)
 #define SekShouldInterrupt (CPU_INT_LEVEL > FLAG_INT_MASK)
 
@@ -235,13 +238,13 @@ extern SH2 sh2s[2];
 
 #ifndef DRC_SH2
 # define ash2_end_run(after) if (sh2->icount > (after)) sh2->icount = after
-# define ash2_cycles_done() (sh2->cycles_aim - sh2->icount)
+# define ash2_cycles_done() (sh2->cycles_timeslice - sh2->icount)
 #else
 # define ash2_end_run(after) { \
    if ((sh2->sr >> 12) > (after)) \
      { sh2->sr &= 0xfff; sh2->sr |= (after) << 12; } \
 }
-# define ash2_cycles_done() (sh2->cycles_aim - (sh2->sr >> 12))
+# define ash2_cycles_done() (sh2->cycles_timeslice - (sh2->sr >> 12))
 #endif
 
 //#define sh2_pc(c)     (c) ? ssh2.ppc : msh2.ppc
@@ -712,6 +715,7 @@ void PicoReset32x(void);
 void Pico32xStartup(void);
 void PicoUnload32x(void);
 void PicoFrame32x(void);
+void p32x_sync_sh2s(unsigned int m68k_target);
 void p32x_update_irls(int nested_call);
 void p32x_reset_sh2s(void);
 
diff --git a/pico/state.c b/pico/state.c
index cb340695..6b91d484 100644
--- a/pico/state.c
+++ b/pico/state.c
@@ -333,6 +333,8 @@ static int state_save(void *file)
     CHECKED_WRITE_BUFF(CHUNK_SDRAM,     Pico32xMem->sdram);
     CHECKED_WRITE_BUFF(CHUNK_DRAM,      Pico32xMem->dram);
     CHECKED_WRITE_BUFF(CHUNK_32XPAL,    Pico32xMem->pal);
+
+    sh2s[0].m68krcycles_done = sh2s[1].m68krcycles_done = SekCycleCnt;
   }
 #endif
 
@@ -690,4 +692,4 @@ void PicoTmpStateRestore(void *data)
 #endif
 }
 
-// vim:shiftwidth=2:expandtab
+// vim:shiftwidth=2:ts=2:expandtab
diff --git a/platform/common/config_file.c b/platform/common/config_file.c
index 28bb263a..f058fbd8 100644
--- a/platform/common/config_file.c
+++ b/platform/common/config_file.c
@@ -319,11 +319,14 @@ static int custom_read(menu_entry *me, const char *var, const char *val)
 			return 1;
 
 		case MA_32XOPT_MSH2_CYCLES:
-		case MA_32XOPT_SSH2_CYCLES: {
-			int *mul = (me->id == MA_32XOPT_MSH2_CYCLES) ? &p32x_msh2_multiplier : &p32x_ssh2_multiplier;
-			*mul = ((unsigned int)atoi(val) << SH2_MULTI_SHIFT) / 7670;
+			currentConfig.msh2_khz = atoi(val);
+			Pico32xSetClocks(currentConfig.msh2_khz * 1000, 0);
+			return 1;
+
+		case MA_32XOPT_SSH2_CYCLES:
+			currentConfig.ssh2_khz = atoi(val);
+			Pico32xSetClocks(0, currentConfig.ssh2_khz * 1000);
 			return 1;
-		}
 
 		/* PSP */
 		case MA_OPT3_SCALE:
diff --git a/platform/common/emu.c b/platform/common/emu.c
index e7a99129..22da55c8 100644
--- a/platform/common/emu.c
+++ b/platform/common/emu.c
@@ -548,6 +548,8 @@ void emu_prep_defconfig(void)
 	defaultConfig.gamma = 100;
 	defaultConfig.scaling = 0;
 	defaultConfig.turbo_rate = 15;
+	defaultConfig.msh2_khz = PICO_MSH2_HZ / 1000;
+	defaultConfig.ssh2_khz = PICO_SSH2_HZ / 1000;
 
 	// platform specific overrides
 	pemu_prep_defconfig();
@@ -561,8 +563,6 @@ void emu_set_defconfig(void)
 	PicoRegionOverride = currentConfig.s_PicoRegion;
 	PicoAutoRgnOrder = currentConfig.s_PicoAutoRgnOrder;
 	PicoCDBuffers = currentConfig.s_PicoCDBuffers;
-	p32x_msh2_multiplier = MSH2_MULTI_DEFAULT;
-	p32x_ssh2_multiplier = SSH2_MULTI_DEFAULT;
 }
 
 int emu_read_config(const char *rom_fname, int no_defaults)
diff --git a/platform/common/emu.h b/platform/common/emu.h
index 4d2f17d4..89c886f7 100644
--- a/platform/common/emu.h
+++ b/platform/common/emu.h
@@ -80,6 +80,8 @@ typedef struct _currentConfig_t {
 	int renderer32x;
 	int filter; // pandora
 	int analog_deadzone;
+	int msh2_khz;
+	int ssh2_khz;
 } currentConfig_t;
 
 extern currentConfig_t currentConfig, defaultConfig;
diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c
index 2dd582dc..e3a6de3d 100644
--- a/platform/common/menu_pico.c
+++ b/platform/common/menu_pico.c
@@ -444,26 +444,28 @@ static int menu_loop_cd_options(int id, int keys)
 // convert from multiplier of VClk
 static int mh_opt_sh2cycles(int id, int keys)
 {
-	int *mul = (id == MA_32XOPT_MSH2_CYCLES) ? &p32x_msh2_multiplier : &p32x_ssh2_multiplier;
+	int *khz = (id == MA_32XOPT_MSH2_CYCLES) ?
+		&currentConfig.msh2_khz : &currentConfig.ssh2_khz;
 
 	if (keys & (PBTN_LEFT|PBTN_RIGHT))
-		*mul += (keys & PBTN_LEFT) ? -10 : 10;
+		*khz += (keys & PBTN_LEFT) ? -50 : 50;
 	if (keys & (PBTN_L|PBTN_R))
-		*mul += (keys & PBTN_L) ? -100 : 100;
+		*khz += (keys & PBTN_L) ? -500 : 500;
 
-	if (*mul < 1)
-		*mul = 1;
-	else if (*mul > (10 << SH2_MULTI_SHIFT))
-		*mul = 10 << SH2_MULTI_SHIFT;
+	if (*khz < 1)
+		*khz = 1;
+	else if (*khz > 0x7fffffff / 1000)
+		*khz = 0x7fffffff / 1000;
 
 	return 0;
 }
 
 static const char *mgn_opt_sh2cycles(int id, int *offs)
 {
-	int mul = (id == MA_32XOPT_MSH2_CYCLES) ? p32x_msh2_multiplier : p32x_ssh2_multiplier;
-	
-	sprintf(static_buff, "%d", 7670 * mul >> SH2_MULTI_SHIFT);
+	int khz = (id == MA_32XOPT_MSH2_CYCLES) ?
+		currentConfig.msh2_khz : currentConfig.ssh2_khz;
+
+	sprintf(static_buff, "%d", khz);
 	return static_buff;
 }
 
@@ -490,6 +492,8 @@ static int menu_loop_32x_options(int id, int keys)
 	me_enable(e_menu_32x_options, MA_32XOPT_RENDERER, renderer_names32x[0] != NULL);
 	me_loop(e_menu_32x_options, &sel);
 
+	Pico32xSetClocks(currentConfig.msh2_khz * 1000, currentConfig.msh2_khz * 1000);
+
 	return 0;
 }
 
diff --git a/platform/libretro.c b/platform/libretro.c
index a6bc50f0..ecf6a293 100644
--- a/platform/libretro.c
+++ b/platform/libretro.c
@@ -609,9 +609,6 @@ void retro_init(void)
 	PicoAutoRgnOrder = 0x184; // US, EU, JP
 	PicoCDBuffers = 0;
 
-	p32x_msh2_multiplier = MSH2_MULTI_DEFAULT;
-	p32x_ssh2_multiplier = SSH2_MULTI_DEFAULT;
-
 	vout_width = 320;
 	vout_height = 240;
 	vout_buf = malloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2);
-- 
2.39.5