* notes:
* - tcache, block descriptor, link buffer overflows result in sh2_translate()
* failure, followed by full tcache invalidation for that region
- * - jumps between blocks are tracked for SMC handling (in block_links[]),
+ * - jumps between blocks are tracked for SMC handling (in block_entry->links),
* except jumps between different tcaches
*
* implemented:
#define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 8)
+struct block_link {
+ u32 target_pc;
+ void *jump; // insn address
+ struct block_link *next; // either in block_entry->links or
+};
+
struct block_entry {
u32 pc;
void *tcache_ptr; // translated block for above PC
struct block_entry *next; // next block in hash_table with same pc hash
+ struct block_link *links; // links to this entry
#if (DRC_DEBUG & 2)
struct block_desc *block;
#endif
struct block_entry entryp[MAX_BLOCK_ENTRIES];
};
-struct block_link {
- u32 target_pc;
- void *jump; // insn address
-// struct block_link_ *next;
-};
-
static const int block_max_counts[TCACHE_BUFFERS] = {
4*1024,
256,
static struct block_desc *block_tables[TCACHE_BUFFERS];
static int block_counts[TCACHE_BUFFERS];
-static const int block_link_max_counts[TCACHE_BUFFERS] = {
+// we have block_link_pool to avoid using mallocs
+static const int block_link_pool_max_counts[TCACHE_BUFFERS] = {
4*1024,
256,
256,
};
-static struct block_link *block_links[TCACHE_BUFFERS];
-static int block_link_counts[TCACHE_BUFFERS];
+static struct block_link *block_link_pool[TCACHE_BUFFERS];
+static int block_link_pool_counts[TCACHE_BUFFERS];
+static struct block_link *unresolved_links[TCACHE_BUFFERS];
// used for invalidation
static const int ram_sizes[TCACHE_BUFFERS] = {
static u32 REGPARM(2) (*sh2_drc_read16)(u32 a, SH2 *sh2);
static u32 REGPARM(2) (*sh2_drc_read32)(u32 a, SH2 *sh2);
static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d);
-static void REGPARM(2) (*sh2_drc_write8_slot)(u32 a, u32 d);
static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d);
-static void REGPARM(2) (*sh2_drc_write16_slot)(u32 a, u32 d);
static int REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2);
// address space stuff
block_counts[tcid], block_max_counts[tcid]);
block_counts[tcid] = 0;
- block_link_counts[tcid] = 0;
+ block_link_pool_counts[tcid] = 0;
+ unresolved_links[tcid] = NULL;
memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * hash_table_sizes[tcid]);
tcache_ptrs[tcid] = tcache_bases[tcid];
if (Pico32xMem != NULL) {
rm_block_list(&inval_lookup[tcid][i]);
}
-#if LINK_BRANCHES
-// add block links (tracked branches)
-static int dr_add_block_link(u32 target_pc, void *jump, int tcache_id)
-{
- struct block_link *bl = block_links[tcache_id];
- int cnt = block_link_counts[tcache_id];
-
- if (cnt >= block_link_max_counts[tcache_id]) {
- dbg(1, "bl overflow for tcache %d\n", tcache_id);
- return -1;
- }
-
- bl[cnt].target_pc = target_pc;
- bl[cnt].jump = jump;
- block_link_counts[tcache_id]++;
-
- return 0;
-}
-#endif
-
static void add_to_hashlist(struct block_entry *be, int tcache_id)
{
u32 tcmask = hash_table_sizes[tcache_id] - 1;
bd->entry_count = 1;
bd->entryp[0].pc = addr;
bd->entryp[0].tcache_ptr = tcache_ptr;
+ bd->entryp[0].links = NULL;
#if (DRC_DEBUG & 2)
bd->entryp[0].block = bd;
bd->refcount = 0;
exit(1);
}
-static void *dr_prepare_ext_branch(u32 pc, SH2 *sh2, int tcache_id)
+static void *dr_prepare_ext_branch(u32 pc, int is_slave, int tcache_id)
{
#if LINK_BRANCHES
+ struct block_link *bl = block_link_pool[tcache_id];
+ int cnt = block_link_pool_counts[tcache_id];
+ struct block_entry *be = NULL;
int target_tcache_id;
- void *target;
- int ret;
-
- target = dr_lookup_block(pc, sh2->is_slave, &target_tcache_id);
- if (target_tcache_id == tcache_id) {
- // allow linking blocks only from local cache
- ret = dr_add_block_link(pc, tcache_ptr, tcache_id);
- if (ret < 0)
- return NULL;
+ int i;
+
+ be = dr_get_entry(pc, is_slave, &target_tcache_id);
+ if (target_tcache_id != tcache_id)
+ return sh2_drc_dispatcher;
+
+ // if pool has been freed, reuse
+ for (i = cnt - 1; i >= 0; i--)
+ if (bl[i].target_pc != 0)
+ break;
+ cnt = i + 1;
+ if (cnt >= block_link_pool_max_counts[tcache_id]) {
+ dbg(1, "bl overflow for tcache %d\n", tcache_id);
+ return NULL;
}
- if (target == NULL || target_tcache_id != tcache_id)
- target = sh2_drc_dispatcher;
+ bl += cnt;
+ block_link_pool_counts[tcache_id]++;
+
+ bl->target_pc = pc;
+ bl->jump = tcache_ptr;
- return target;
+ if (be != NULL) {
+ dbg(2, "- early link from %p to pc %08x", bl->jump, pc);
+ bl->next = be->links;
+ be->links = bl;
+ return be->tcache_ptr;
+ }
+ else {
+ bl->next = unresolved_links[tcache_id];
+ unresolved_links[tcache_id] = bl;
+ return sh2_drc_dispatcher;
+ }
#else
return sh2_drc_dispatcher;
#endif
}
-static void dr_link_blocks(void *target, u32 pc, int tcache_id)
+static void dr_link_blocks(struct block_entry *be, int tcache_id)
{
-#if 0 // FIXME: invalidated blocks must not be in block_links
-//LINK_BRANCHES
- struct block_link *bl = block_links[tcache_id];
- int cnt = block_link_counts[tcache_id];
- int i;
-
- for (i = 0; i < cnt; i++) {
- if (bl[i].target_pc == pc) {
- dbg(2, "- link from %p", bl[i].jump);
- emith_jump_patch(bl[i].jump, target);
- // XXX: sync ARM caches (old jump should be fine)?
+#if LINK_BRANCHES
+ struct block_link *first = unresolved_links[tcache_id];
+ struct block_link *bl, *prev, *tmp;
+ u32 pc = be->pc;
+
+ for (bl = prev = first; bl != NULL; ) {
+ if (bl->target_pc == pc) {
+ dbg(2, "- link from %p to pc %08x", bl->jump, pc);
+ emith_jump_patch(bl->jump, tcache_ptr);
+
+ // move bl from unresolved_links to block_entry
+ tmp = bl->next;
+ bl->next = be->links;
+ be->links = bl;
+
+ if (bl == first)
+ first = prev = bl = tmp;
+ else
+ prev->next = bl = tmp;
+ continue;
}
+ prev = bl;
+ bl = bl->next;
}
+ unresolved_links[tcache_id] = first;
+
+ // could sync arm caches here, but that's unnecessary
#endif
}
return hr2;
}
-static void emit_memhandler_write(int size, u32 pc, int delay)
+static void emit_memhandler_write(int size, u32 pc)
{
int ctxr;
host_arg2reg(ctxr, 2);
switch (size) {
case 0: // 8
// XXX: consider inlining sh2_drc_write8
- if (delay) {
- emith_call(sh2_drc_write8_slot);
- } else {
- emit_move_r_imm32(SHR_PC, pc);
- rcache_clean();
- emith_call(sh2_drc_write8);
- }
+ rcache_clean();
+ emith_call(sh2_drc_write8);
break;
case 1: // 16
- if (delay) {
- emith_call(sh2_drc_write16_slot);
- } else {
- emit_move_r_imm32(SHR_PC, pc);
- rcache_clean();
- emith_call(sh2_drc_write16);
- }
+ rcache_clean();
+ emith_call(sh2_drc_write16);
break;
case 2: // 32
emith_move_r_r(ctxr, CONTEXT_REG);
break;
}
+ rcache_invalidate();
if (reg_map_g2h[SHR_SR] != -1)
emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4);
- rcache_invalidate();
}
// @(Rx,Ry)
dbg(2, "== %csh2 block #%d,%d %08x-%08x -> %p", sh2->is_slave ? 's' : 'm',
tcache_id, blkid_main, base_pc, end_pc, block_entry_ptr);
- dr_link_blocks(tcache_ptr, base_pc, tcache_id);
+ dr_link_blocks(&block->entryp[0], tcache_id);
// collect branch_targets that don't land on delay slots
for (pc = base_pc; pc < end_pc; pc += 2) {
if (v < ARRAY_SIZE(block->entryp)) {
block->entryp[v].pc = pc;
block->entryp[v].tcache_ptr = tcache_ptr;
+ block->entryp[v].links = NULL;
#if (DRC_DEBUG & 2)
block->entryp[v].block = block;
#endif
dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p", sh2->is_slave ? 's' : 'm',
tcache_id, blkid_main, pc, tcache_ptr);
- // since we made a block entry, link any other blocks that jump to current pc
- dr_link_blocks(tcache_ptr, pc, tcache_id);
+ // since we made a block entry, link any other blocks
+ // that jump to current pc
+ dr_link_blocks(&block->entryp[v], tcache_id);
}
else {
dbg(1, "too many entryp for block #%d,%d pc=%08x",
tmp2 = rcache_get_reg_arg(0, SHR_R0);
tmp3 = rcache_get_reg(GET_Rn(), RC_GR_READ);
emith_add_r_r(tmp2, tmp3);
- emit_memhandler_write(op & 3, pc, drcf.delayed_op);
+ emit_memhandler_write(op & 3, pc);
goto end_op;
case 0x07:
// MUL.L Rm,Rn 0000nnnnmmmm0111
tmp2 = rcache_get_reg_arg(1, GET_Rm());
if (op & 0x0f)
emith_add_r_imm(tmp, (op & 0x0f) * 4);
- emit_memhandler_write(2, pc, drcf.delayed_op);
+ emit_memhandler_write(2, pc);
goto end_op;
case 0x02:
rcache_clean();
rcache_get_reg_arg(0, GET_Rn());
rcache_get_reg_arg(1, GET_Rm());
- emit_memhandler_write(op & 3, pc, drcf.delayed_op);
+ emit_memhandler_write(op & 3, pc);
goto end_op;
case 0x04: // MOV.B Rm,@–Rn 0010nnnnmmmm0100
case 0x05: // MOV.W Rm,@–Rn 0010nnnnmmmm0101
rcache_clean();
rcache_get_reg_arg(0, GET_Rn());
rcache_get_reg_arg(1, GET_Rm());
- emit_memhandler_write(op & 3, pc, drcf.delayed_op);
+ emit_memhandler_write(op & 3, pc);
goto end_op;
case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111
sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
tmp3 = rcache_get_reg_arg(1, tmp);
if (tmp == SHR_SR)
emith_clear_msb(tmp3, tmp3, 22); // reserved bits defined by ISA as 0
- emit_memhandler_write(2, pc, drcf.delayed_op);
+ emit_memhandler_write(2, pc);
goto end_op;
case 0x04:
case 0x05:
emith_move_r_r(tmp2, tmp);
rcache_free_tmp(tmp);
rcache_get_reg_arg(0, GET_Rn());
- emit_memhandler_write(0, pc, drcf.delayed_op);
- cycles += 3;
+ emit_memhandler_write(0, pc);
break;
default:
goto default_;
tmp3 = (op & 0x100) >> 8;
if (op & 0x0f)
emith_add_r_imm(tmp, (op & 0x0f) << tmp3);
- emit_memhandler_write(tmp3, pc, drcf.delayed_op);
+ emit_memhandler_write(tmp3, pc);
goto end_op;
case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd
case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd
tmp2 = rcache_get_reg_arg(1, SHR_R0);
tmp3 = (op & 0x300) >> 8;
emith_add_r_imm(tmp, (op & 0xff) << tmp3);
- emit_memhandler_write(tmp3, pc, drcf.delayed_op);
+ emit_memhandler_write(tmp3, pc);
goto end_op;
case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd
case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd
emith_add_r_imm(tmp, 4);
tmp = rcache_get_reg_arg(1, SHR_SR);
emith_clear_msb(tmp, tmp, 22);
- emit_memhandler_write(2, pc, drcf.delayed_op);
+ emit_memhandler_write(2, pc);
// push PC
rcache_get_reg_arg(0, SHR_SP);
tmp = rcache_get_tmp_arg(1);
emith_move_r_imm(tmp, pc);
- emit_memhandler_write(2, pc, drcf.delayed_op);
+ emit_memhandler_write(2, pc);
// obtain new PC
emit_memhandler_read_rr(SHR_PC, SHR_VBR, (op & 0xff) * 4, 2);
out_pc = (u32)-1;
tmp3 = rcache_get_reg_arg(0, SHR_GBR);
tmp4 = rcache_get_reg(SHR_R0, RC_GR_READ);
emith_add_r_r(tmp3, tmp4);
- emit_memhandler_write(0, pc, drcf.delayed_op);
- cycles += 2;
+ emit_memhandler_write(0, pc);
goto end_op;
}
goto default_;
emit_move_r_imm32(SHR_PC, target_pc);
rcache_clean();
- target = dr_prepare_ext_branch(target_pc, sh2, tcache_id);
+ target = dr_prepare_ext_branch(target_pc, sh2->is_slave, tcache_id);
if (target == NULL)
return NULL;
emith_jump_cond_patchable(pending_branch_cond, target);
emit_move_r_imm32(SHR_PC, out_pc);
rcache_flush();
- target = dr_prepare_ext_branch(out_pc, sh2, tcache_id);
+ target = dr_prepare_ext_branch(out_pc, sh2->is_slave, tcache_id);
if (target == NULL)
return NULL;
emith_jump_patchable(target);
static void sh2_generate_utils(void)
{
int arg0, arg1, arg2, sr, tmp;
- void *sh2_drc_write_end, *sh2_drc_write_slot_end;
sh2_drc_write32 = p32x_sh2_write32;
sh2_drc_read8 = p32x_sh2_read8;
emith_call(sh2_drc_test_irq);
emith_jump(sh2_drc_dispatcher);
- // write-caused irq detection
- sh2_drc_write_end = tcache_ptr;
- emith_tst_r_r(arg0, arg0);
- EMITH_SJMP_START(DCOND_NE);
- emith_jump_ctx_c(DCOND_EQ, offsetof(SH2, drc_tmp)); // return
- EMITH_SJMP_END(DCOND_NE);
- emith_call(sh2_drc_test_irq);
- emith_jump_ctx(offsetof(SH2, drc_tmp));
-
- // write-caused irq detection for writes in delay slot
- sh2_drc_write_slot_end = tcache_ptr;
- emith_tst_r_r(arg0, arg0);
- EMITH_SJMP_START(DCOND_NE);
- emith_jump_ctx_c(DCOND_EQ, offsetof(SH2, drc_tmp));
- EMITH_SJMP_END(DCOND_NE);
- // just burn cycles to get back to dispatcher after branch is handled
- sr = rcache_get_reg(SHR_SR, RC_GR_RMW);
- emith_ctx_write(sr, offsetof(SH2, irq_cycles));
- emith_clear_msb(sr, sr, 20); // clear cycles
- rcache_flush();
- emith_jump_ctx(offsetof(SH2, drc_tmp));
-
// sh2_drc_write8(u32 a, u32 d)
sh2_drc_write8 = (void *)tcache_ptr;
- emith_ret_to_ctx(offsetof(SH2, drc_tmp));
emith_ctx_read(arg2, offsetof(SH2, write8_tab));
- emith_sh2_wcall(arg0, arg2, sh2_drc_write_end);
+ emith_sh2_wcall(arg0, arg2);
// sh2_drc_write16(u32 a, u32 d)
sh2_drc_write16 = (void *)tcache_ptr;
- emith_ret_to_ctx(offsetof(SH2, drc_tmp));
- emith_ctx_read(arg2, offsetof(SH2, write16_tab));
- emith_sh2_wcall(arg0, arg2, sh2_drc_write_end);
-
- // sh2_drc_write8_slot(u32 a, u32 d)
- sh2_drc_write8_slot = (void *)tcache_ptr;
- emith_ret_to_ctx(offsetof(SH2, drc_tmp));
- emith_ctx_read(arg2, offsetof(SH2, write8_tab));
- emith_sh2_wcall(arg0, arg2, sh2_drc_write_slot_end);
-
- // sh2_drc_write16_slot(u32 a, u32 d)
- sh2_drc_write16_slot = (void *)tcache_ptr;
- emith_ret_to_ctx(offsetof(SH2, drc_tmp));
emith_ctx_read(arg2, offsetof(SH2, write16_tab));
- emith_sh2_wcall(arg0, arg2, sh2_drc_write_slot_end);
+ emith_sh2_wcall(arg0, arg2);
#ifdef PDB_NET
// debug
MAKE_READ_WRAPPER(sh2_drc_read16);
MAKE_READ_WRAPPER(sh2_drc_read32);
MAKE_WRITE_WRAPPER(sh2_drc_write8);
- MAKE_WRITE_WRAPPER(sh2_drc_write8_slot);
MAKE_WRITE_WRAPPER(sh2_drc_write16);
- MAKE_WRITE_WRAPPER(sh2_drc_write16_slot);
MAKE_WRITE_WRAPPER(sh2_drc_write32);
#if (DRC_DEBUG & 4)
host_dasm_new_symbol(sh2_drc_read8);
host_dasm_new_symbol(sh2_drc_dispatcher);
host_dasm_new_symbol(sh2_drc_exit);
host_dasm_new_symbol(sh2_drc_test_irq);
- host_dasm_new_symbol(sh2_drc_write_end);
- host_dasm_new_symbol(sh2_drc_write_slot_end);
host_dasm_new_symbol(sh2_drc_write8);
- host_dasm_new_symbol(sh2_drc_write8_slot);
host_dasm_new_symbol(sh2_drc_write16);
- host_dasm_new_symbol(sh2_drc_write16_slot);
#endif
}
static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 ram_mask)
{
+ struct block_link *bl, *bl_next, *bl_unresolved;
void *tmp;
u32 i, addr;
}
tmp = tcache_ptr;
+ bl_unresolved = unresolved_links[tcache_id];
- // remove from hash table
- // XXX: maybe kill links somehow instead?
+ // remove from hash table, make incoming links unresolved
+ // XXX: maybe patch branches w/flush instead?
for (i = 0; i < bd->entry_count; i++) {
rm_from_hashlist(&bd->entryp[i], tcache_id);
emith_jump(sh2_drc_dispatcher);
host_instructions_updated(bd->entryp[i].tcache_ptr, tcache_ptr);
+
+ for (bl = bd->entryp[i].links; bl != NULL; ) {
+ bl_next = bl->next;
+ bl->next = bl_unresolved;
+ bl_unresolved = bl;
+ bl = bl_next;
+ }
}
tcache_ptr = tmp;
+ unresolved_links[tcache_id] = bl_unresolved;
bd->addr = bd->end_addr = 0;
bd->entry_count = 0;
if (block_tables[i] == NULL)
goto fail;
// max 2 block links (exits) per block
- block_links[i] = calloc(block_link_max_counts[i], sizeof(*block_links[0]));
- if (block_links[i] == NULL)
+ block_link_pool[i] = calloc(block_link_pool_max_counts[i],
+ sizeof(*block_link_pool[0]));
+ if (block_link_pool[i] == NULL)
goto fail;
inval_lookup[i] = calloc(ram_sizes[i] / ADDR_TO_BLOCK_PAGE,
goto fail;
}
memset(block_counts, 0, sizeof(block_counts));
- memset(block_link_counts, 0, sizeof(block_link_counts));
+ memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts));
drc_cmn_init();
tcache_ptr = tcache;
if (block_tables[i] != NULL)
free(block_tables[i]);
block_tables[i] = NULL;
- if (block_links[i] == NULL)
- free(block_links[i]);
- block_links[i] = NULL;
+ if (block_link_pool[i] == NULL)
+ free(block_link_pool[i]);
+ block_link_pool[i] = NULL;
if (inval_lookup[i] == NULL)
free(inval_lookup[i]);