/* ldr and str */
#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,0,1,rn,rd,offset_12)
#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,1,1,rn,rd,offset_12)
+#define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,abs(offset_12))
#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,offset_12)
#define EOP_LDR_NEGIMM(rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,0,0,1,rn,rd,offset_12)
#define emith_add_r_r(d, s) \
emith_add_r_r_r(d, d, s)
+#define emith_add_r_r_ptr(d, s) \
+ emith_add_r_r_r(d, d, s)
+
#define emith_sub_r_r(d, s) \
EOP_SUB_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0)
// misc
#define emith_read_r_r_offs_c(cond, r, rs, offs) \
EOP_LDR_IMM2(cond, r, rs, offs)
+#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \
+ emith_read_r_r_offs_c(cond, r, rs, offs)
#define emith_read_r_r_r_c(cond, r, rs, rm) \
EOP_LDR_REG_LSL(cond, r, rs, rm, 0)
#define emith_read_r_r_r(r, rs, rm) \
#define emith_read16_r_r_offs(r, rs, offs) \
emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs)
+#define emith_write_r_r_offs_c(cond, r, rs, offs) \
+ EOP_STR_IMM2(cond, r, rs, offs)
+#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \
+ emith_write_r_r_offs_c(cond, r, rs, offs)
+
+#define emith_ctx_read_c(cond, r, offs) \
+ emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs)
#define emith_ctx_read(r, offs) \
- emith_read_r_r_offs(r, CONTEXT_REG, offs)
+ emith_ctx_read_c(A_COND_AL, r, offs)
#define emith_ctx_read_ptr(r, offs) \
emith_ctx_read(r, offs)
EMIT_OP_MODRM(0x01, 3, s, d)
#define emith_add_r_r_ptr(d, s) do { \
- EMIT_REX_IF(1, dst, src); \
+ EMIT_REX_IF(1, s, d); \
EMIT_OP_MODRM64(0x01, 3, s, d); \
} while (0)
} \
} while (0)
+// _r_r_r_shift
+#define emith_add_r_r_r_lsl(d, s1, s2, lslimm) do { \
+ int tmp_ = rcache_get_tmp(); \
+ emith_lsl(tmp_, s2, lslimm); \
+ emith_add_r_r_r(d, s1, tmp_); \
+ rcache_free_tmp(tmp_); \
+} while (0)
+
+#define emith_add_r_r_r_lsr(d, s1, s2, lslimm) do { \
+ int tmp_ = rcache_get_tmp(); \
+ emith_lsr(tmp_, s2, lslimm); \
+ emith_add_r_r_r(d, s1, tmp_); \
+ rcache_free_tmp(tmp_); \
+} while (0)
+
// _r_r_shift
#define emith_or_r_r_lsl(d, s, lslimm) do { \
int tmp_ = rcache_get_tmp(); \
#define emith_read_r_r_offs_c(cond, r, rs, offs) \
emith_read_r_r_offs(r, rs, offs)
+#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \
+ emith_read_r_r_offs_ptr(r, rs, offs)
#define emith_write_r_r_offs_c(cond, r, rs, offs) \
emith_write_r_r_offs(r, rs, offs)
+#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \
+ emith_write_r_r_offs_ptr(r, rs, offs)
#define emith_read8_r_r_offs_c(cond, r, rs, offs) \
emith_read8_r_r_offs(r, rs, offs)
#define emith_write8_r_r_offs_c(cond, r, rs, offs) \
#define emith_read_r_r_offs(r, rs, offs) \
emith_deref_op(0x8b, r, rs, offs)
+#define emith_read_r_r_offs_ptr(r, rs, offs) \
+ EMIT_REX_IF(1, r, rs); \
+ emith_deref_op(0x8b, r, rs, offs)
#define emith_write_r_r_offs(r, rs, offs) \
emith_deref_op(0x89, r, rs, offs)
+#define emith_write_r_r_offs_ptr(r, rs, offs) \
+ EMIT_REX_IF(1, r, rs); \
+ emith_deref_op(0x89, r, rs, offs)
// note: don't use prefixes on this
#define emith_read8_r_r_offs(r, rs, offs) do { \
#define emith_ctx_read(r, offs) \
emith_read_r_r_offs(r, CONTEXT_REG, offs)
+#define emith_ctx_read_c(cond, r, offs) \
+ emith_ctx_read(r, offs)
#define emith_ctx_read_ptr(r, offs) do { \
EMIT_REX_IF(1, r, CONTEXT_REG); \
// features
#define PROPAGATE_CONSTANTS 1
#define LINK_BRANCHES 1
+#define BRANCH_CACHE 1
#define ALIAS_REGISTERS 1
#define REMAP_REGISTER 1
// 10 - smc self-check
// 100 - write trace
// 200 - compare trace
-// 400 - print block entry backtrace
+// 400 - block entry backtraceA on exit
+// 800 - state dump on exit
// {
#ifndef DRC_DEBUG
-#define DRC_DEBUG 0
+#define DRC_DEBUG 0x800
#endif
#if DRC_DEBUG
#define do_host_disasm(x)
#endif
-#if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB)
-
#define SH2_DUMP(sh2, reason) { \
char ms = (sh2)->is_slave ? 's' : 'm'; \
printf("%csh2 %s %08x\n", ms, reason, (sh2)->pc); \
(sh2)->pdb_io_csum[0], (sh2)->pdb_io_csum[1], (sh2)->state, \
(sh2)->poll_addr, (sh2)->poll_cycles, (sh2)->poll_cnt); \
}
+
+#if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB)
static SH2 csh2[2][4];
static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr)
{
memset(Pico32xMem->drcblk_da[tcid - 1], 0,
sizeof(Pico32xMem->drcblk_da[0]));
}
+#if BRANCH_CACHE
+ if (tcid)
+ memset32(sh2s[tcid-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
+ else {
+ memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
+ memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4);
+ }
+#endif
#if (DRC_DEBUG & 4)
tcache_dsm_ptrs[tcid] = tcache_bases[tcid];
#endif
// sh2_drc_dispatcher(void)
sh2_drc_dispatcher = (void *)tcache_ptr;
- sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL);
- emith_cmp_r_imm(sr, 0);
- emith_jump_cond(DCOND_LT, sh2_drc_exit);
- rcache_invalidate();
emith_ctx_read(arg0, SHR_PC * 4);
+#if BRANCH_CACHE
+ // check if PC is in branch target cache
+ emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*4);
+ // TODO implement emith_add_r_r_r_lsl_ptr, saves one insn on 32bit ARM
+ emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1);
+ emith_add_r_r_ptr(arg1, CONTEXT_REG);
+ emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache));
+ emith_cmp_r_r(arg2, arg0);
+ EMITH_SJMP_START(DCOND_NE);
+ emith_read_r_r_offs_ptr_c(DCOND_EQ, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
+ emith_jump_reg_c(DCOND_EQ, RET_REG);
+ EMITH_SJMP_END(DCOND_NE);
+#endif
emith_ctx_read(arg1, offsetof(SH2, is_slave));
emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp));
emith_call(dr_lookup_block);
+#if BRANCH_CACHE
+ // store PC and block entry ptr (in arg0) in branch target cache
+ emith_tst_r_r_ptr(RET_REG, RET_REG);
+ EMITH_SJMP_START(DCOND_EQ);
+ emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4);
+ emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*4);
+ emith_lsl(arg1, arg1, sizeof(void *) == 8 ? 2 : 1);
+ emith_add_r_r_ptr(arg1, CONTEXT_REG);
+ emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache));
+ emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *));
+ EMITH_SJMP_END(DCOND_EQ);
+#endif
emit_block_entry();
// lookup failed, call sh2_translate()
emith_move_r_r_ptr(arg0, CONTEXT_REG);
bd->addr = bd->size = bd->size_nolit = 0;
bd->entry_count = 0;
+
+#if BRANCH_CACHE
+ if (tcache_id)
+ memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
+ else {
+ memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4);
+ memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4);
+ }
+#endif
}
/*
return ret_cycles;
}
-#if (DRC_DEBUG & 2)
-void block_stats(void)
+static void block_stats(void)
{
+#if (DRC_DEBUG & 2)
int c, b, i, total = 0;
printf("block stats:\n");
for (b = 0; b < ARRAY_SIZE(block_tables); b++)
for (i = 0; i < block_counts[b]; i++)
block_tables[b][i].refcount = 0;
-}
-#else
-#define block_stats()
#endif
+}
-void sh2_drc_flush_all(void)
+static void backtrace(void)
{
#if (DRC_DEBUG & 1024)
int i;
for (i = 0; i < ARRAY_SIZE(csh2[1]); i++)
SH2_DUMP(&csh2[1][i], "bt ssh2");
#endif
+}
+
+static void state_dump(void)
+{
+#if (DRC_DEBUG & 2048)
+ int i;
+
+ SH2_DUMP(&sh2s[0], "master");
+ printf("VBR msh2: %x\n", sh2s[0].vbr);
+ for (i = 0; i < 0x60; i++) {
+ printf("%08x ",p32x_sh2_read32(sh2s[0].vbr + i*4, &sh2s[0]));
+ if ((i+1) % 8 == 0) printf("\n");
+ }
+ printf("stack msh2: %x\n", sh2s[0].r[15]);
+ for (i = -0x30; i < 0x30; i++) {
+ printf("%08x ",p32x_sh2_read32(sh2s[0].r[15] + i*4, &sh2s[0]));
+ if ((i+1) % 8 == 0) printf("\n");
+ }
+ printf("branch cache master:\n");
+ for (i = 0; i < ARRAY_SIZE(sh2s[0].branch_cache); i++) {
+ printf("%08x ",sh2s[0].branch_cache[i].pc);
+ if ((i+1) % 8 == 0) printf("\n");
+ }
+ SH2_DUMP(&sh2s[1], "slave");
+ printf("VBR ssh2: %x\n", sh2s[1].vbr);
+ for (i = 0; i < 0x60; i++) {
+ printf("%08x ",p32x_sh2_read32(sh2s[1].vbr + i*4, &sh2s[1]));
+ if ((i+1) % 8 == 0) printf("\n");
+ }
+ printf("stack ssh2: %x\n", sh2s[1].r[15]);
+ for (i = -0x30; i < 0x30; i++) {
+ printf("%08x ",p32x_sh2_read32(sh2s[1].r[15] + i*4, &sh2s[1]));
+ if ((i+1) % 8 == 0) printf("\n");
+ }
+ printf("branch cache slave:\n");
+ for (i = 0; i < ARRAY_SIZE(sh2s[1].branch_cache); i++) {
+ printf("%08x ",sh2s[1].branch_cache[i].pc);
+ if ((i+1) % 8 == 0) printf("\n");
+ }
+#endif
+}
+
+void sh2_drc_flush_all(void)
+{
+ backtrace();
+ state_dump();
block_stats();
flush_tcache(0);
flush_tcache(1);
int poll_cycles;\r
int poll_cnt;\r
\r
+ // DRC branch cache. size must be 2^n and <=128\r
+ struct { unsigned int pc; void *code; } branch_cache[128];\r
+\r
// interpreter stuff\r
int icount; // cycles left in current timeslice\r
unsigned int ea;\r