+ if (p32x_sh2_ready(sh2->other_sh2, cycles-250))
+ p32x_sync_other_sh2(sh2, cycles);
+}
+
+// poll fifo, stores writes to potential addresses used for polling.
+// This is used to correctly deliver syncronisation data to the 3 cpus. The
+// fifo stores 16 bit values, 8/32 bit accesses must be adapted accordingly.
+#define PFIFO_SZ 4
+#define PFIFO_CNT 8
+struct sh2_poll_fifo {
+ u32 cycles;
+ u32 a;
+ u16 d;
+ int cpu;
+} sh2_poll_fifo[PFIFO_CNT][PFIFO_SZ];
+unsigned sh2_poll_rd[PFIFO_CNT], sh2_poll_wr[PFIFO_CNT]; // ringbuffer pointers
+
+static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2)
+{
+ int hix = (a >> 1) % PFIFO_CNT;
+ struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix];
+ struct sh2_poll_fifo *p;
+ int cpu = sh2 ? sh2->is_slave : -1;
+ unsigned idx;
+
+ a &= ~0x20000000; // ignore writethrough bit
+ // fetch oldest write to address from fifo, but stop when reaching the present
+ idx = sh2_poll_rd[hix];
+ while (idx != sh2_poll_wr[hix] && CYCLES_GE(cycles, fifo[idx].cycles)) {
+ p = &fifo[idx];
+ idx = (idx+1) % PFIFO_SZ;
+
+ if (cpu != p->cpu) {
+ if (CYCLES_GT(cycles, p->cycles+60)) { // ~180 sh2 cycles, Spiderman
+ // drop older fifo stores that may cause synchronisation problems.
+ p->a = -1;
+ } else if (p->a == a) {
+ // replace current data with fifo value and discard fifo entry
+ d = p->d;
+ p->a = -1;
+ break;
+ }
+ }
+ }
+ return d;
+}
+
+static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2)
+{
+ int hix = (a >> 1) % PFIFO_CNT;
+ struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix];
+ struct sh2_poll_fifo *q;
+ int cpu = sh2 ? sh2->is_slave : -1;
+ unsigned rd = sh2_poll_rd[hix], wr = sh2_poll_wr[hix];
+ unsigned idx, nrd;
+
+ a &= ~0x20000000; // ignore writethrough bit
+
+ // throw out any values written by other cpus, plus heading cancelled stuff
+ for (idx = nrd = wr; idx != rd; ) {
+ idx = (idx-1) % PFIFO_SZ;
+ q = &fifo[idx];
+ if (q->a == a && q->cpu != cpu) { q->a = -1; }
+ if (q->a != -1) { nrd = idx; }
+ }
+ rd = nrd;
+
+ // fold 2 consecutive writes to the same address to avoid reading of
+ // intermediate values that may cause synchronisation problems.
+ // NB this can take an eternity on m68k: mov.b <addr1.l>,<addr2.l> needs
+ // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head)
+ q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ];
+ if (rd != wr && q->a == a && !CYCLES_GT(cycles,q->cycles + (cpu<0 ? 30:4))) {
+ q->d = d;
+ } else {
+ // store write to poll address in fifo
+ fifo[wr] =
+ (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu };
+ wr = (wr+1) % PFIFO_SZ;
+ if (wr == rd)
+ // fifo overflow, discard oldest value
+ rd = (rd+1) % PFIFO_SZ;
+ }
+
+ sh2_poll_rd[hix] = rd; sh2_poll_wr[hix] = wr;
+}
+
+u32 REGPARM(3) p32x_sh2_poll_memory8(u32 a, u32 d, SH2 *sh2)
+{
+ int shift = (a & 1 ? 0 : 8);
+ d = (s8)(p32x_sh2_poll_memory16(a & ~1, d << shift, sh2) >> shift);
+ return d;
+}
+
+u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2)
+{
+ unsigned char *p = sh2->p_drcblk_ram;
+ unsigned int cycles;
+
+ DRC_SAVE_SR(sh2);
+ // is this a synchronisation address?
+ if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) {
+ cycles = sh2_cycles_done_m68k(sh2);
+ sh2s_sync_on_read(sh2, cycles);
+ // check poll fifo and sign-extend the result correctly
+ d = (s16)sh2_poll_read(a, d, cycles, sh2);
+ }
+
+ p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 7);
+
+ DRC_RESTORE_SR(sh2);
+ return d;
+}
+
+u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2)
+{
+ unsigned char *p = sh2->p_drcblk_ram;
+ unsigned int cycles;
+
+ DRC_SAVE_SR(sh2);
+ // is this a synchronisation address?
+ if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) {
+ cycles = sh2_cycles_done_m68k(sh2);
+ sh2s_sync_on_read(sh2, cycles);
+ // check poll fifo and sign-extend the result correctly
+ d = (sh2_poll_read(a, d >> 16, cycles, sh2) << 16) |
+ ((u16)sh2_poll_read(a+2, d, cycles, sh2));
+ }
+
+ p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 7);
+
+ DRC_RESTORE_SR(sh2);
+ return d;