#define A_OP_TST 0x8
#define A_OP_TEQ 0x9
#define A_OP_CMP 0xa
-#define A_OP_CMN 0xa
+#define A_OP_CMN 0xb
#define A_OP_ORR 0xc
#define A_OP_MOV 0xd
#define A_OP_BIC 0xe
#define EOP_MOVT(rd,imm) \
EMIT(0xe3400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000))
-// XXX: AND, RSB, *C, will break if 1 insn is not enough
+static int count_bits(unsigned val)
+{
+ val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
+ val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
+ val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
+ val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
+ return (val & 0xffff) + (val >> 16);
+}
+
+// XXX: RSB, *S will break if 1 insn is not enough
static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm)
{
int ror2;
switch (op) {
case A_OP_MOV:
rn = 0;
- if (~imm < 0x10000) {
+ // count bits in imm and use MVN if more bits 1 than 0
+ if (count_bits(imm) > 16) {
imm = ~imm;
op = A_OP_MVN;
}
-#ifdef HAVE_ARMV7
- for (v = imm, ror2 = 0; v && !(v & 3); v >>= 2)
- ror2--;
- if (v >> 8) {
- /* 2+ insns needed - prefer movw/movt */
- if (op == A_OP_MVN)
- imm = ~imm;
- EOP_MOVW(rd, imm);
- if (imm & 0xffff0000)
- EOP_MOVT(rd, imm);
- return;
- }
-#endif
break;
case A_OP_EOR:
case A_OP_ADD:
case A_OP_ORR:
case A_OP_BIC:
- if (s == 0 && imm == 0)
+ if (s == 0 && imm == 0 && rd == rn)
return;
break;
}
- for (v = imm, ror2 = 0; ; ror2 -= 8/2) {
- /* shift down to get 'best' rot2 */
- for (; v && !(v & 3); v >>= 2)
- ror2--;
-
- EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0x0f, v & 0xff);
-
- v >>= 8;
- if (v == 0)
- break;
- if (op == A_OP_MOV)
- op = A_OP_ORR;
- if (op == A_OP_MVN)
+ again:
+ v = imm, ror2 = 32/2; // arm imm shift is ROR, so rotate for best fit
+ while ((v >> 24) && !(v & 0xc0))
+ v = (v << 2) | (v >> 30), ror2++;
+ do {
+ // shift down to get 'best' rot2
+ while (v > 0xff && !(v & 3))
+ v >>= 2, ror2--;
+ // AND must fit into 1 insn. if not, use BIC
+ if (op == A_OP_AND && v != (v & 0xff)) {
+ imm = ~imm;
op = A_OP_BIC;
+ goto again;
+ }
+ EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0xf, v & 0xff);
+
+ switch (op) {
+ case A_OP_MOV: op = A_OP_ORR; break;
+ case A_OP_MVN: op = A_OP_BIC; break;
+ case A_OP_ADC: op = A_OP_ADD; break;
+ case A_OP_SBC: op = A_OP_SUB; break;
+ }
rn = rd;
- }
+
+ v >>= 8, ror2 -= 8/2;
+ } while (v);
}
#define emith_op_imm(cond, s, op, r, imm) \
#define emith_cmp_r_imm(r, imm) { \
u32 op = A_OP_CMP, imm_ = imm; \
if (~imm_ < 0x100) { \
- imm_ = ~imm_; \
+ imm_ = -imm_; \
op = A_OP_CMN; \
} \
emith_top_imm(A_COND_AL, op, r, imm); \
if ((count) <= 8) { \
t = (count) - 8; \
t = (0xff << t) & 0xff; \
- EOP_BIC_IMM(d,s,8/2,t); \
EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \
} else if ((count) >= 24) { \
t = (count) - 24; \
t = 0xff >> t; \
- EOP_AND_IMM(d,s,0,t); \
EOP_C_DOP_IMM(cond,A_OP_AND,0,s,d,0,t); \
} else { \
EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,count); \
rmr = s2; \
} \
EMIT_OP_MODRM(0xf7, 3, op, rmr); /* xMUL rmr */ \
- /* XXX: using push/pop for the case of edx->eax; eax->edx */ \
- if (dhi != xDX && dhi != -1) \
- emith_push(xDX); \
if (dlo != xAX) \
- emith_move_r_r(dlo, xAX); \
- if (dhi != xDX && dhi != -1) \
- emith_pop(dhi); \
+ EMIT_OP(0x90 + (dlo)); /* XCHG eax, dlo */ \
+ if (dhi != xDX && dhi != -1 && !(dhi == xAX && dlo == xDX)) \
+ emith_move_r_r(dhi, (dlo == xDX ? xAX : xDX)); \
if (dlo != xDX && dhi != xDX) \
emith_pop(xDX); \
if (dlo != xAX && dhi != xAX) \
#define emith_deref_op(op, r, rs, offs) do { \
/* mov r <-> [ebp+#offs] */ \
- if ((offs) >= 0x80) { \
+ if (abs(offs) >= 0x80) { \
EMIT_OP_MODRM64(op, 2, r, rs); \
EMIT(offs, u32); \
} else { \
EMIT_OP_MODRM64(op, 1, r, rs); \
- EMIT(offs, u8); \
+ EMIT((u8)offs, u8); \
} \
} while (0)
int r_ = r; \
if (!is_abcdx(r)) \
r_ = rcache_get_tmp(); \
- emith_deref_op(0x8a, r_, rs, offs); \
+ EMIT(0x0f, u8); \
+ emith_deref_op(0xb6, r_, rs, offs); \
if ((r) != r_) { \
emith_move_r_r(r, r_); \
rcache_free_tmp(r_); \
} while (0)
#define emith_read16_r_r_offs(r, rs, offs) do { \
- EMIT(0x66, u8); /* operand override */ \
- emith_read_r_r_offs(r, rs, offs); \
+ EMIT(0x0f, u8); \
+ emith_deref_op(0xb7, r, rs, offs); \
} while (0)
#define emith_write16_r_r_offs(r, rs, offs) do { \
case 0: rd = xDI; break; \
case 1: rd = xSI; break; \
case 2: rd = xDX; break; \
+ case 2: rd = xBX; break; \
}
#define emith_sh2_drc_entry() { \
// do this to avoid missing irqs that other SH2 might clear
int vector = sh2->irq_callback(sh2, level);
sh2_do_irq(sh2, level, vector);
- sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, 13);
+ sh2->m68krcycles_done += C_SH2_TO_M68K(sh2, 13);
}
else
sh2->test_irq = 1;
\r
#define CYCLE_MULT_SHIFT 10\r
#define C_M68K_TO_SH2(xsh2, c) \\r
- ((int)((c) * (xsh2).mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT)\r
+ ((int)((long long)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT)\r
#define C_SH2_TO_M68K(xsh2, c) \\r
- ((int)((c + 3) * (xsh2).mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT)\r
+ ((int)((long long)(c+3) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT)\r
\r
int sh2_init(SH2 *sh2, int is_slave, SH2 *other_sh2);\r
void sh2_finish(SH2 *sh2);\r
}
p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VINT);
- p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0);
- p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0);
+ p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, SekCyclesDone());
+ p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, SekCyclesDone());
}
void p32x_schedule_hint(SH2 *sh2, int m68k_cycles)
p32x_event_schedule(now, event, after);
- left_to_next = (event_time_next - now) * 3;
- sh2_end_run(sh2, left_to_next);
+ left_to_next = C_M68K_TO_SH2(sh2, (int)(event_time_next - now));
+ if (sh2_cycles_left(sh2) > left_to_next) {
+ if (left_to_next < 1)
+ left_to_next = 1;
+ sh2_end_run(sh2, left_to_next);
+ }
}
static void p32x_run_events(unsigned int until)
pevt_log_sh2_o(sh2, EVT_RUN_START);
sh2->state |= SH2_STATE_RUN;
- cycles = C_M68K_TO_SH2(*sh2, m68k_cycles);
+ cycles = C_M68K_TO_SH2(sh2, m68k_cycles);
elprintf_sh2(sh2, EL_32X, "+run %u %d @%08x",
sh2->m68krcycles_done, cycles, sh2->pc);
done = sh2_execute(sh2, cycles, PicoIn.opt & POPT_EN_DRC);
- sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, done);
+ sh2->m68krcycles_done += C_SH2_TO_M68K(sh2, done);
sh2->state &= ~SH2_STATE_RUN;
pevt_log_sh2_o(sh2, EVT_RUN_END);
elprintf_sh2(sh2, EL_32X, "-run %u %d",
// there might be new event to schedule current sh2 to
if (event_time_next) {
- left_to_event = event_time_next - m68k_target;
- left_to_event *= 3;
+ left_to_event = C_M68K_TO_SH2(sh2, (int)(event_time_next - m68k_target));
if (sh2_cycles_left(sh2) > left_to_event) {
if (left_to_event < 1)
left_to_event = 1;
now = ssh2.m68krcycles_done;
timer_cycles = now;
+ pprof_start(m68k);
while (CYCLES_GT(m68k_target, now))
{
if (event_time_next && CYCLES_GE(now, event_time_next))
target - msh2.m68krcycles_done, target - ssh2.m68krcycles_done,
m68k_target - now, Pico32x.emu_flags);
+ pprof_start(ssh2);
if (!(ssh2.state & SH2_IDLE_STATES)) {
cycles = target - ssh2.m68krcycles_done;
if (cycles > 0) {
target = event_time_next;
}
}
+ pprof_end(ssh2);
+ pprof_start(msh2);
if (!(msh2.state & SH2_IDLE_STATES)) {
cycles = target - msh2.m68krcycles_done;
if (cycles > 0) {
target = event_time_next;
}
}
+ pprof_end(msh2);
now = target;
if (!(msh2.state & SH2_IDLE_STATES)) {
p32x_timers_do(now - timer_cycles);
timer_cycles = now;
}
+ pprof_end_sub(m68k);
// advance idle CPUs
if (msh2.state & SH2_IDLE_STATES) {
if (!(Pico32x.sh2_regs[0] & 0x80))
p32x_schedule_hint(NULL, SekCyclesDone());
- p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0);
- p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0);
+ p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, SekCyclesDone());
+ p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, SekCyclesDone());
if (PicoIn.AHW & PAHW_MCD)
pcd_prepare_frame();
cycles = sh2_cycles_done(sh2);
if (cycles > 600)
- p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + cycles / 3);
+ p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles));
}
// SH2 faking
if (SekShouldInterrupt())
Pico_mcd->m.s68k_poll_a = 0;
+ pprof_start(s68k);
SekCycleCntS68k += cyc_do;
#if defined(EMU_C68K)
PicoCpuCS68k.cycles = cyc_do;
#elif defined(EMU_F68K)
SekCycleCntS68k += fm68k_emulate(&PicoCpuFS68k, cyc_do, 0) - cyc_do;
#endif
+ pprof_end(s68k);
}
static void pcd_set_cycle_mult(void)
# define sh2_pc(sh2) (sh2)->pc\r
#endif\r
\r
-#define sh2_cycles_done(sh2) ((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2))\r
+#define sh2_cycles_done(sh2) ((unsigned)(sh2)->cycles_timeslice - sh2_cycles_left(sh2))\r
#define sh2_cycles_done_t(sh2) \\r
- ((sh2)->m68krcycles_done * 3 + sh2_cycles_done(sh2))\r
+ (unsigned)(C_M68K_TO_SH2(sh2, (sh2)->m68krcycles_done) + sh2_cycles_done(sh2))\r
#define sh2_cycles_done_m68k(sh2) \\r
- ((sh2)->m68krcycles_done + (sh2_cycles_done(sh2) / 3))\r
+ (unsigned)((sh2)->m68krcycles_done + C_SH2_TO_M68K(sh2, sh2_cycles_done(sh2)))\r
\r
#define sh2_reg(c, x) (c) ? ssh2.r[x] : msh2.r[x]\r
#define sh2_gbr(c) (c) ? ssh2.gbr : msh2.gbr\r