X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cpu%2Fsh2%2Fcompiler.c;h=6f08e7b8f2c9e577ddcea52abfa1ac21f6b68498;hb=fcdefcf62cb71969b456a6f27688adbeb5890bb9;hp=eea9a60c0e78aa0da1df4e712fa3396e7f6fd9c6;hpb=5686d93123821e06ac2d9f6209c37438303dd4c1;p=picodrive.git diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index eea9a60..6f08e7b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -6,6 +6,8 @@ * failure, followed by full tcache invalidation for that region * - jumps between blocks are tracked for SMC handling (in block_links[]), * except jumps between different tcaches + * - non-main block entries are called subblocks, as they have same tracking + * structures that main blocks have. * * implemented: * - static register allocation @@ -15,7 +17,6 @@ * - some constant propagation * * TODO: - * - proper SMC handling * - better constant propagation * - stack caching? * - bug fixing @@ -35,8 +36,14 @@ #define PROPAGATE_CONSTANTS 1 #define LINK_BRANCHES 1 +// limits (per block) +#define BLOCK_CYCLE_LIMIT 100 +#define MAX_BLOCK_SIZE (BLOCK_CYCLE_LIMIT * 6 * 6) + // max literal offset from the block end #define MAX_LITERAL_OFFSET 32*2 +#define MAX_LITERALS (BLOCK_CYCLE_LIMIT / 4) +#define MAX_LOCAL_BRANCHES 32 // debug stuff { #ifndef DRC_DEBUG @@ -59,7 +66,7 @@ static int insns_compiled, hash_collisions, host_insn_count; #define dbg(...) #endif -#if (DRC_DEBUG & 2) +#if (DRC_DEBUG & 4) static u8 *tcache_dsm_ptrs[3]; static char sh2dasm_buff[64]; #define do_host_disasm(tcid) \ @@ -69,11 +76,11 @@ static char sh2dasm_buff[64]; #define do_host_disasm(x) #endif -#if (DRC_DEBUG & 4) || defined(PDB) +#if (DRC_DEBUG & 8) || defined(PDB) static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) { if (block != NULL) { - dbg(4, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm', + dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm', sh2->pc, block, (signed int)sr >> 12); pdb_step(sh2, sh2->pc); } @@ -82,8 +89,6 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) #endif // } debug -#define BLOCK_CYCLE_LIMIT 100 -#define MAX_BLOCK_SIZE (BLOCK_CYCLE_LIMIT * 6 * 6) #define TCACHE_BUFFERS 3 // we have 3 translation cache buffers, split from one drc/cmn buffer. @@ -104,17 +109,16 @@ static u8 *tcache_ptr; typedef struct block_desc_ { u32 addr; // SH2 PC address - u32 end_addr; // TODO rm? void *tcache_ptr; // translated block for above PC struct block_desc_ *next; // next block with the same PC hash -#if (DRC_DEBUG & 1) +#if (DRC_DEBUG & 2) int refcount; #endif } block_desc; typedef struct block_link_ { u32 target_pc; - void *jump; + void *jump; // insn address // struct block_link_ *next; } block_link; @@ -231,13 +235,108 @@ static int REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2); extern void REGPARM(2) sh2_do_op(SH2 *sh2, int opcode); -static void flush_tcache(int tcid) +// address space stuff +static void *dr_get_pc_base(u32 pc, int is_slave) +{ + void *ret = NULL; + u32 mask = 0; + + if ((pc & ~0x7ff) == 0) { + // BIOS + ret = is_slave ? Pico32xMem->sh2_rom_s : Pico32xMem->sh2_rom_m; + mask = 0x7ff; + } + else if ((pc & 0xfffff000) == 0xc0000000) { + // data array + ret = Pico32xMem->data_array[is_slave]; + mask = 0xfff; + } + else if ((pc & 0xc6000000) == 0x06000000) { + // SDRAM + ret = Pico32xMem->sdram; + mask = 0x03ffff; + } + else if ((pc & 0xc6000000) == 0x02000000) { + // ROM + ret = Pico.rom; + mask = 0x3fffff; + } + + if (ret == NULL) + return (void *)-1; // NULL is valid value + + return (char *)ret - (pc & ~mask); +} + +static int dr_ctx_get_mem_ptr(u32 a, u32 *mask) +{ + int poffs = -1; + + if ((a & ~0x7ff) == 0) { + // BIOS + poffs = offsetof(SH2, p_bios); + *mask = 0x7ff; + } + else if ((a & 0xfffff000) == 0xc0000000) { + // data array + poffs = offsetof(SH2, p_da); + *mask = 0xfff; + } + else if ((a & 0xc6000000) == 0x06000000) { + // SDRAM + poffs = offsetof(SH2, p_sdram); + *mask = 0x03ffff; + } + else if ((a & 0xc6000000) == 0x02000000) { + // ROM + poffs = offsetof(SH2, p_rom); + *mask = 0x3fffff; + } + + return poffs; +} + +static block_desc *dr_get_bd(u32 pc, int is_slave, int *tcache_id) +{ + *tcache_id = 0; + + // we have full block id tables for data_array and RAM + // BIOS goes to data_array table too + if ((pc & 0xe0000000) == 0xc0000000 || (pc & ~0xfff) == 0) { + int blkid = Pico32xMem->drcblk_da[is_slave][(pc & 0xfff) >> SH2_DRCBLK_DA_SHIFT]; + *tcache_id = 1 + is_slave; + if (blkid & 1) + return &block_tables[*tcache_id][blkid >> 1]; + } + // RAM + else if ((pc & 0xc6000000) == 0x06000000) { + int blkid = Pico32xMem->drcblk_ram[(pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT]; + if (blkid & 1) + return &block_tables[0][blkid >> 1]; + } + // ROM + else if ((pc & 0xc6000000) == 0x02000000) { + block_desc *bd = HASH_FUNC(hash_table, pc); + + for (; bd != NULL; bd = bd->next) + if (bd->addr == pc) + return bd; + } + + return NULL; +} + +// --------------------------------------------------------------- + +// block management +static void REGPARM(1) flush_tcache(int tcid) { dbg(1, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tcache_ptrs[tcid] - tcache_bases[tcid], tcache_sizes[tcid], block_counts[tcid], block_max_counts[tcid]); block_counts[tcid] = 0; + block_link_counts[tcid] = 0; tcache_ptrs[tcid] = tcache_bases[tcid]; if (tcid == 0) { // ROM, RAM memset(hash_table, 0, sizeof(hash_table[0]) * MAX_HASH_ENTRIES); @@ -245,7 +344,7 @@ static void flush_tcache(int tcid) } else memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[0])); -#if (DRC_DEBUG & 2) +#if (DRC_DEBUG & 4) tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; #endif } @@ -258,7 +357,7 @@ static int dr_add_block_link(u32 target_pc, void *jump, int tcache_id) int cnt = block_link_counts[tcache_id]; if (cnt >= block_max_counts[tcache_id] * 2) { - printf("bl overflow for tcache %d\n", tcache_id); + dbg(1, "bl overflow for tcache %d\n", tcache_id); return -1; } @@ -270,28 +369,27 @@ static int dr_add_block_link(u32 target_pc, void *jump, int tcache_id) } #endif -static void *dr_find_block(block_desc *tab, u32 addr) +static block_desc *dr_add_block(u32 addr, int is_slave, int *blk_id) { - for (tab = tab->next; tab != NULL; tab = tab->next) - if (tab->addr == addr) - break; - - if (tab != NULL) - return tab->tcache_ptr; - - printf("block miss for %08x\n", addr); - return NULL; -} - -static block_desc *dr_add_block(u32 addr, int tcache_id, int *blk_id) -{ - int *bcount = &block_counts[tcache_id]; block_desc *bd; + int tcache_id; + int *bcount; + + bd = dr_get_bd(addr, is_slave, &tcache_id); + if (bd != NULL) { + dbg(2, "block override for %08x", addr); + bd->tcache_ptr = tcache_ptr; + *blk_id = bd - block_tables[tcache_id]; + return bd; + } + bcount = &block_counts[tcache_id]; if (*bcount >= block_max_counts[tcache_id]) { - printf("bd overflow for tcache %d\n", tcache_id); + dbg(1, "bd overflow for tcache %d", tcache_id); return NULL; } + if (*bcount == 0) + (*bcount)++; // not using descriptor 0 bd = &block_tables[tcache_id][*bcount]; bd->addr = addr; @@ -302,7 +400,7 @@ static block_desc *dr_add_block(u32 addr, int tcache_id, int *blk_id) if ((addr & 0xc6000000) == 0x02000000) { // ROM bd->next = HASH_FUNC(hash_table, addr); HASH_FUNC(hash_table, addr) = bd; -#if (DRC_DEBUG & 1) +#if (DRC_DEBUG & 2) if (bd->next != NULL) { printf(" hash collision with %08x\n", bd->next->addr); hash_collisions++; @@ -313,14 +411,70 @@ static block_desc *dr_add_block(u32 addr, int tcache_id, int *blk_id) return bd; } +static void REGPARM(3) *dr_lookup_block(u32 pc, int is_slave, int *tcache_id) +{ + block_desc *bd = NULL; + void *block = NULL; + + bd = dr_get_bd(pc, is_slave, tcache_id); + if (bd != NULL) + block = bd->tcache_ptr; + +#if (DRC_DEBUG & 2) + if (bd != NULL) + bd->refcount++; +#endif + return block; +} + +static void *dr_prepare_ext_branch(u32 pc, SH2 *sh2, int tcache_id) +{ +#if LINK_BRANCHES + int target_tcache_id; + void *target; + int ret; + + target = dr_lookup_block(pc, sh2->is_slave, &target_tcache_id); + if (target_tcache_id == tcache_id) { + // allow linking blocks only from local cache + ret = dr_add_block_link(pc, tcache_ptr, tcache_id); + if (ret < 0) + return NULL; + } + if (target == NULL || target_tcache_id != tcache_id) + target = sh2_drc_dispatcher; + + return target; +#else + return sh2_drc_dispatcher; +#endif +} + +static void dr_link_blocks(void *target, u32 pc, int tcache_id) +{ +#if LINK_BRANCHES + block_link *bl = block_links[tcache_id]; + int cnt = block_link_counts[tcache_id]; + int i; + + for (i = 0; i < cnt; i++) { + if (bl[i].target_pc == pc) { + dbg(2, "- link from %p", bl[i].jump); + emith_jump_patch(bl[i].jump, target); + // XXX: sync ARM caches (old jump should be fine)? + } + } +#endif +} + #define ADD_TO_ARRAY(array, count, item, failcode) \ array[count++] = item; \ if (count >= ARRAY_SIZE(array)) { \ - printf("warning: " #array " overflow\n"); \ + dbg(1, "warning: " #array " overflow"); \ failcode; \ } -int find_in_array(u32 *array, size_t size, u32 what) +static int find_in_array(u32 *array, size_t size, u32 what) { size_t i; for (i = 0; i < size; i++) @@ -332,6 +486,7 @@ int find_in_array(u32 *array, size_t size, u32 what) // --------------------------------------------------------------- +// register cache / constant propagation stuff typedef enum { RC_GR_READ, RC_GR_WRITE, @@ -346,9 +501,9 @@ static u32 dr_gcregs[24]; static u32 dr_gcregs_mask; static u32 dr_gcregs_dirty; +#if PROPAGATE_CONSTANTS static void gconst_new(sh2_reg_e r, u32 val) { -#if PROPAGATE_CONSTANTS int i; dr_gcregs_mask |= 1 << r; @@ -363,8 +518,8 @@ static void gconst_new(sh2_reg_e r, u32 val) reg_temp[i].flags = 0; } } -#endif } +#endif static int gconst_get(sh2_reg_e r, u32 *val) { @@ -423,7 +578,6 @@ static void gconst_invalidate(void) dr_gcregs_mask = dr_gcregs_dirty = 0; } -// register chache static u16 rcache_counter; static temp_reg_t *rcache_evict(void) @@ -561,9 +715,8 @@ static int rcache_get_arg_id(int arg) if (reg_temp[i].hreg == r) break; - if (i == ARRAY_SIZE(reg_temp)) - // let's just say it's untracked arg reg - return r; + if (i == ARRAY_SIZE(reg_temp)) // can't happen + exit(1); if (reg_temp[i].type == HR_CACHED) { // writeback @@ -595,7 +748,7 @@ static int rcache_get_tmp_arg(int arg) static int rcache_get_reg_arg(int arg, sh2_reg_e r) { int i, srcr, dstr, dstid; - int dirty = 0; + int dirty = 0, src_dirty = 0; dstid = rcache_get_arg_id(arg); dstr = reg_temp[dstid].hreg; @@ -611,6 +764,8 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r) reg_temp[i].greg == r) { srcr = reg_temp[i].hreg; + if (reg_temp[i].flags & HRF_DIRTY) + src_dirty = 1; goto do_cache; } } @@ -627,13 +782,22 @@ static int rcache_get_reg_arg(int arg, sh2_reg_e r) do_cache: if (dstr != srcr) emith_move_r_r(dstr, srcr); +#if 1 + else + dirty |= src_dirty; + + if (dirty) + // must clean, callers might want to modify the arg before call + emith_ctx_write(dstr, r * 4); +#else + if (dirty) + reg_temp[dstid].flags |= HRF_DIRTY; +#endif reg_temp[dstid].stamp = ++rcache_counter; reg_temp[dstid].type = HR_CACHED; reg_temp[dstid].greg = r; reg_temp[dstid].flags |= HRF_LOCKED; - if (dirty) - reg_temp[dstid].flags |= HRF_DIRTY; return dstr; } @@ -701,70 +865,17 @@ static void rcache_flush(void) // --------------------------------------------------------------- -// address space stuff -static void *dr_get_pc_base(u32 pc, int is_slave) -{ - void *ret = NULL; - u32 mask = 0; - - if ((pc & ~0x7ff) == 0) { - // BIOS - ret = is_slave ? Pico32xMem->sh2_rom_s : Pico32xMem->sh2_rom_m; - mask = 0x7ff; - } - else if ((pc & 0xfffff000) == 0xc0000000) { - // data array - ret = Pico32xMem->data_array[is_slave]; - mask = 0xfff; - } - else if ((pc & 0xc6000000) == 0x06000000) { - // SDRAM - ret = Pico32xMem->sdram; - mask = 0x03ffff; - } - else if ((pc & 0xc6000000) == 0x02000000) { - // ROM - ret = Pico.rom; - mask = 0x3fffff; - } - - if (ret == NULL) - return (void *)-1; // NULL is valid value - - return (char *)ret - (pc & ~mask); -} - static int emit_get_rbase_and_offs(u32 a, u32 *offs) { - int poffs = -1; u32 mask = 0; + int poffs; int hr; - if ((a & ~0x7ff) == 0) { - // BIOS - poffs = offsetof(SH2, p_bios); - mask = 0x7ff; - } - else if ((a & 0xfffff000) == 0xc0000000) { - // data array - poffs = offsetof(SH2, p_da); - mask = 0xfff; - } - else if ((a & 0xc6000000) == 0x06000000) { - // SDRAM - poffs = offsetof(SH2, p_sdram); - mask = 0x03ffff; - } - else if ((a & 0xc6000000) == 0x02000000) { - // ROM - poffs = offsetof(SH2, p_rom); - mask = 0x3fffff; - } - + poffs = dr_ctx_get_mem_ptr(a, &mask); if (poffs == -1) return -1; - // XXX: could use related reg + // XXX: could use some related reg hr = rcache_get_tmp(); emith_ctx_read(hr, poffs); emith_add_r_imm(hr, a & mask & ~0xff); @@ -772,49 +883,6 @@ static int emit_get_rbase_and_offs(u32 a, u32 *offs) return hr; } -static void REGPARM(3) *lookup_block(u32 pc, int is_slave, int *tcache_id) -{ - block_desc *bd = NULL; - void *block = NULL; - *tcache_id = 0; - - // we have full block id tables for data_array and RAM - // BIOS goes to data_array table too - if ((pc & 0xe0000000) == 0xc0000000 || (pc & ~0xfff) == 0) { - int blkid = Pico32xMem->drcblk_da[is_slave][(pc & 0xfff) >> SH2_DRCBLK_DA_SHIFT]; - *tcache_id = 1 + is_slave; - if (blkid & 1) { - bd = &block_tables[*tcache_id][blkid >> 1]; - block = bd->tcache_ptr; - } - } - // RAM - else if ((pc & 0xc6000000) == 0x06000000) { - int blkid = Pico32xMem->drcblk_ram[(pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT]; - if (blkid & 1) { - bd = &block_tables[0][blkid >> 1]; - block = bd->tcache_ptr; - } - } - // ROM - else if ((pc & 0xc6000000) == 0x02000000) { - bd = HASH_FUNC(hash_table, pc); - - if (bd != NULL) { - if (bd->addr == pc) - block = bd->tcache_ptr; - else - block = dr_find_block(bd, pc); - } - } - -#if (DRC_DEBUG & 1) - if (bd != NULL) - bd->refcount++; -#endif - return block; -} - static void emit_move_r_imm32(sh2_reg_e dst, u32 imm) { #if PROPAGATE_CONSTANTS @@ -1060,7 +1128,7 @@ static void emit_block_entry(void) host_arg2reg(arg1, 1); host_arg2reg(arg2, 2); -#if (DRC_DEBUG & 4) || defined(PDB) +#if (DRC_DEBUG & 8) || defined(PDB) emit_do_static_regs(1, arg2); emith_move_r_r(arg1, CONTEXT_REG); emith_move_r_r(arg2, rcache_get_reg(SHR_SR, RC_GR_READ)); @@ -1073,46 +1141,6 @@ static void emit_block_entry(void) EMITH_SJMP_END(DCOND_EQ); } -void dr_link_blocks(void *target, u32 pc, int tcache_id) -{ -#if LINK_BRANCHES - block_link *bl = block_links[tcache_id]; - int cnt = block_link_counts[tcache_id]; - int i; - - for (i = 0; i < cnt; i++) { - if (bl[i].target_pc == pc) { - dbg(1, "- link from %p", bl[i].jump); - emith_jump_patch(bl[i].jump, target); - // XXX: sync ARM caches (old jump should be fine)? - } - } -#endif -} - -void *dr_prepare_ext_branch(u32 pc, SH2 *sh2, int tcache_id) -{ -#if LINK_BRANCHES - int target_tcache_id; - void *target; - int ret; - - target = lookup_block(pc, sh2->is_slave, &target_tcache_id); - if (target_tcache_id == tcache_id) { - // allow linking blocks only from local cache - ret = dr_add_block_link(pc, tcache_ptr, tcache_id); - if (ret < 0) - return NULL; - } - if (target == NULL || target_tcache_id != tcache_id) - target = sh2_drc_dispatcher; - - return target; -#else - return sh2_drc_dispatcher; -#endif -} - #define DELAYED_OP \ drcf.delayed_op = 2 @@ -1154,8 +1182,6 @@ void *dr_prepare_ext_branch(u32 pc, SH2 *sh2, int tcache_id) if (GET_Fx() >= n) \ goto default_ -#define MAX_LOCAL_BRANCHES 32 - // op_flags: data from 1st pass #define OP_FLAGS(pc) op_flags[((pc) - base_pc) / 2] #define OF_DELAY_OP (1 << 0) @@ -1163,12 +1189,15 @@ void *dr_prepare_ext_branch(u32 pc, SH2 *sh2, int tcache_id) static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { // XXX: maybe use structs instead? - void *branch_target_ptr[MAX_LOCAL_BRANCHES]; u32 branch_target_pc[MAX_LOCAL_BRANCHES]; + void *branch_target_ptr[MAX_LOCAL_BRANCHES]; + int branch_target_blkid[MAX_LOCAL_BRANCHES]; int branch_target_count = 0; void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; int branch_patch_count = 0; + u32 literal_addr[MAX_LITERALS]; + int literal_addr_count = 0; int pending_branch_cond = -1; int pending_branch_pc = 0; u8 op_flags[BLOCK_CYCLE_LIMIT + 1]; @@ -1180,7 +1209,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // PC of current, first, last, last_target_blk SH2 insn u32 pc, base_pc, end_pc, out_pc; - u32 last_inlined_literal = 0; void *block_entry; block_desc *this_block; u16 *dr_pc_base; @@ -1202,19 +1230,19 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } tcache_ptr = tcache_ptrs[tcache_id]; - this_block = dr_add_block(base_pc, tcache_id, &blkid_main); + this_block = dr_add_block(base_pc, sh2->is_slave, &blkid_main); if (this_block == NULL) return NULL; // predict tcache overflow tmp = tcache_ptr - tcache_bases[tcache_id]; if (tmp > tcache_sizes[tcache_id] - MAX_BLOCK_SIZE) { - printf("tcache %d overflow\n", tcache_id); + dbg(1, "tcache %d overflow", tcache_id); return NULL; } block_entry = tcache_ptr; - dbg(1, "== %csh2 block #%d,%d %08x -> %p", sh2->is_slave ? 's' : 'm', + dbg(2, "== %csh2 block #%d,%d %08x -> %p", sh2->is_slave ? 's' : 'm', tcache_id, blkid_main, base_pc, block_entry); dr_link_blocks(tcache_ptr, base_pc, tcache_id); @@ -1267,6 +1295,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } branch_target_count = tmp; memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); + memset(branch_target_blkid, 0, sizeof(branch_target_blkid[0]) * branch_target_count); // ------------------------------------------------- // 2nd pass: actual compilation @@ -1282,14 +1311,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) op = FETCH_OP(pc); i = find_in_array(branch_target_pc, branch_target_count, pc); - if (i >= 0) + if (i >= 0 || pc == base_pc) { - if (pc != sh2->pc) + if (pc != base_pc) { /* make "subblock" - just a mid-block entry */ block_desc *subblock; - u16 *drcblk; - int blkid; sr = rcache_get_reg(SHR_SR, RC_GR_RMW); FLUSH_CYCLES(sr); @@ -1300,28 +1327,18 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_flush(); do_host_disasm(tcache_id); - subblock = dr_add_block(pc, tcache_id, &blkid); + dbg(2, "-- %csh2 subblock #%d,%d %08x -> %p", sh2->is_slave ? 's' : 'm', + tcache_id, branch_target_blkid[i], pc, tcache_ptr); + + subblock = dr_add_block(pc, sh2->is_slave, &branch_target_blkid[i]); if (subblock == NULL) return NULL; - subblock->end_addr = pc; - - if (tcache_id != 0) { // data array, BIOS - drcblk = Pico32xMem->drcblk_da[sh2->is_slave]; - drcblk += (pc & 0x00fff) >> SH2_DRCBLK_DA_SHIFT; - *drcblk = (blkid << 1) | 1; - } else if ((this_block->addr & 0xc7fc0000) == 0x06000000) { // DRAM - drcblk = Pico32xMem->drcblk_ram; - drcblk += (pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT; - *drcblk = (blkid << 1) | 1; - } - - dbg(1, "-- %csh2 subblock #%d,%d %08x -> %p", sh2->is_slave ? 's' : 'm', - tcache_id, blkid, pc, tcache_ptr); // since we made a block entry, link any other blocks that jump to current pc dr_link_blocks(tcache_ptr, pc, tcache_id); } - branch_target_ptr[i] = tcache_ptr; + if (i >= 0) + branch_target_ptr[i] = tcache_ptr; // must update PC emit_move_r_imm32(SHR_PC, pc); @@ -1332,11 +1349,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_cmp_r_imm(sr, 0); emith_jump_cond(DCOND_LE, sh2_drc_exit); do_host_disasm(tcache_id); + rcache_unlock_all(); } -#if (DRC_DEBUG & 3) - insns_compiled++; #if (DRC_DEBUG & 2) + insns_compiled++; +#if (DRC_DEBUG & 4) DasmSH2(sh2dasm_buff, pc, op); printf("%08x %04x %s\n", pc, op, sh2dasm_buff); #endif @@ -1848,7 +1866,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // XXX: limit burned cycles emit_move_r_imm32(GET_Rn(), 0); emith_or_r_imm(sr, T); - cycles += tmp * 4; + cycles += tmp * 4 + 1; // +1 syncs with noconst version, not sure why skip_op = 1; } else @@ -2289,10 +2307,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // MOV.W @(disp,PC),Rn 1001nnnndddddddd tmp = pc + (op & 0xff) * 2 + 2; #if PROPAGATE_CONSTANTS - if (tmp < end_pc + MAX_LITERAL_OFFSET) { + if (tmp < end_pc + MAX_LITERAL_OFFSET && literal_addr_count < MAX_LITERALS) { + ADD_TO_ARRAY(literal_addr, literal_addr_count, tmp,); gconst_new(GET_Rn(), (u32)(int)(signed short)FETCH_OP(tmp)); - if (last_inlined_literal < tmp) - last_inlined_literal = tmp; } else #endif @@ -2431,10 +2448,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // MOV.L @(disp,PC),Rn 1101nnnndddddddd tmp = (pc + (op & 0xff) * 4 + 2) & ~3; #if PROPAGATE_CONSTANTS - if (tmp < end_pc + MAX_LITERAL_OFFSET) { + if (tmp < end_pc + MAX_LITERAL_OFFSET && literal_addr_count < MAX_LITERALS) { + ADD_TO_ARRAY(literal_addr, literal_addr_count, tmp,); gconst_new(GET_Rn(), FETCH32(tmp)); - if (last_inlined_literal < tmp) - last_inlined_literal = tmp; } else #endif @@ -2496,7 +2512,7 @@ end_op: branch_patch_count++; if (branch_patch_count == MAX_LOCAL_BRANCHES) { - printf("warning: too many local branches\n"); + dbg(1, "warning: too many local branches"); break; } } @@ -2562,8 +2578,8 @@ end_op: t = find_in_array(branch_target_pc, branch_target_count, branch_patch_pc[i]); target = branch_target_ptr[t]; if (target == NULL) { - // flush pc and go back to dispatcher (should no longer happen) - printf("stray branch to %08x %p\n", branch_patch_pc[i], tcache_ptr); + // flush pc and go back to dispatcher (this should no longer happen) + dbg(1, "stray branch to %08x %p", branch_patch_pc[i], tcache_ptr); target = tcache_ptr; emit_move_r_imm32(SHR_PC, branch_patch_pc[i]); rcache_flush(); @@ -2572,47 +2588,58 @@ end_op: emith_jump_patch(branch_patch_ptr[i], target); } - this_block->end_addr = pc; - if (last_inlined_literal > pc) - this_block->end_addr = last_inlined_literal + 4; + end_pc = pc; // mark memory blocks as containing compiled code - if (tcache_id != 0) { - // data array, BIOS - u16 *drcblk = Pico32xMem->drcblk_da[sh2->is_slave]; - tmp = (this_block->addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT; - tmp2 = (this_block->end_addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT; - drcblk[tmp] = (blkid_main << 1) | 1; - for (++tmp; tmp < tmp2; tmp++) { - if (drcblk[tmp]) - continue; // dont overwrite overlay block(s) - drcblk[tmp] = blkid_main << 1; + // override any overlay blocks as they become unreachable anyway + if (tcache_id != 0 || (this_block->addr & 0xc7fc0000) == 0x06000000) + { + u16 *drc_ram_blk = NULL; + u32 mask = 0, shift = 0; + + if (tcache_id != 0) { + // data array, BIOS + drc_ram_blk = Pico32xMem->drcblk_da[sh2->is_slave]; + shift = SH2_DRCBLK_DA_SHIFT; + mask = 0xfff; } - } - else if ((this_block->addr & 0xc7fc0000) == 0x06000000) { // DRAM - tmp = (this_block->addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT; - tmp2 = (this_block->end_addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT; - Pico32xMem->drcblk_ram[tmp] = (blkid_main << 1) | 1; - for (++tmp; tmp < tmp2; tmp++) { - if (Pico32xMem->drcblk_ram[tmp]) - continue; - Pico32xMem->drcblk_ram[tmp] = blkid_main << 1; + else if ((this_block->addr & 0xc7fc0000) == 0x06000000) { + // SDRAM + drc_ram_blk = Pico32xMem->drcblk_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + mask = 0x3ffff; + } + + drc_ram_blk[(base_pc >> shift) & mask] = (blkid_main << 1) | 1; + for (pc = base_pc + 2; pc < end_pc; pc += 2) + drc_ram_blk[(pc >> shift) & mask] = blkid_main << 1; + + // mark subblocks + for (i = 0; i < branch_target_count; i++) + if (branch_target_blkid[i] != 0) + drc_ram_blk[(branch_target_pc[i] >> shift) & mask] = + (branch_target_blkid[i] << 1) | 1; + + // mark literals + for (i = 0; i < literal_addr_count; i++) { + tmp = literal_addr[i]; + drc_ram_blk[(tmp >> shift) & mask] = blkid_main << 1; + if (!(tmp & 3)) // assume long + drc_ram_blk[((tmp + 2) >> shift) & mask] = blkid_main << 1; } } tcache_ptrs[tcache_id] = tcache_ptr; -#ifdef ARM - cache_flush_d_inval_i(block_entry, tcache_ptr); -#endif + host_instructions_updated(block_entry, tcache_ptr); do_host_disasm(tcache_id); - dbg(1, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f", + dbg(2, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f", tcache_id, block_counts[tcache_id], tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id], insns_compiled, host_insn_count, (double)host_insn_count / insns_compiled); if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM - dbg(1, " hash collisions %d/%d", hash_collisions, block_counts[tcache_id]); + dbg(2, " hash collisions %d/%d", hash_collisions, block_counts[tcache_id]); /* printf("~~~\n"); tcache_dsm_ptrs[tcache_id] = block_entry; @@ -2620,7 +2647,7 @@ end_op: printf("~~~\n"); */ -#if (DRC_DEBUG & 2) +#if (DRC_DEBUG & 4) fflush(stdout); #endif @@ -2656,7 +2683,7 @@ static void sh2_generate_utils(void) emith_ctx_read(arg0, SHR_PC * 4); emith_ctx_read(arg1, offsetof(SH2, is_slave)); emith_add_r_r_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); - emith_call(lookup_block); + emith_call(dr_lookup_block); emit_block_entry(); // lookup failed, call sh2_translate() emith_move_r_r(arg0, CONTEXT_REG); @@ -2781,7 +2808,7 @@ static void sh2_generate_utils(void) // debug #define MAKE_READ_WRAPPER(func) { \ void *tmp = (void *)tcache_ptr; \ - emith_ret_to_ctx(offsetof(SH2, drc_tmp)); \ + emith_push_ret(); \ emith_call(func); \ emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[0])); \ emith_addf_r_r(arg2, arg0); \ @@ -2789,7 +2816,7 @@ static void sh2_generate_utils(void) emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1])); \ emith_adc_r_imm(arg2, 0x01000000); \ emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \ - emith_jump_ctx(offsetof(SH2, drc_tmp)); \ + emith_pop_and_ret(); \ func = tmp; \ } #define MAKE_WRITE_WRAPPER(func) { \ @@ -2813,7 +2840,7 @@ static void sh2_generate_utils(void) MAKE_WRITE_WRAPPER(sh2_drc_write16); MAKE_WRITE_WRAPPER(sh2_drc_write16_slot); MAKE_WRITE_WRAPPER(sh2_drc_write32); -#if (DRC_DEBUG & 2) +#if (DRC_DEBUG & 4) host_dasm_new_symbol(sh2_drc_read8); host_dasm_new_symbol(sh2_drc_read16); host_dasm_new_symbol(sh2_drc_read32); @@ -2822,7 +2849,7 @@ static void sh2_generate_utils(void) #endif rcache_invalidate(); -#if (DRC_DEBUG & 2) +#if (DRC_DEBUG & 4) host_dasm_new_symbol(sh2_drc_entry); host_dasm_new_symbol(sh2_drc_dispatcher); host_dasm_new_symbol(sh2_drc_exit); @@ -2836,48 +2863,101 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_block(u16 *drcblk, u16 *p, block_desc *btab, u32 a) +static void *sh2_smc_rm_block_entry(block_desc *bd, int tcache_id) { - u16 id = *p >> 1; - block_desc *bd = btab + id; - - // FIXME: skip subblocks; do both directions - // FIXME: collect all branches - dbg(1, " killing block %08x", bd->addr); - bd->addr = bd->end_addr = 0; - - while (p > drcblk && (p[-1] >> 1) == id) - p--; - - // check for possible overlay block - if (p > 0 && p[-1] != 0) { - bd = btab + (p[-1] >> 1); - if (bd->addr <= a && a < bd->end_addr) - sh2_smc_rm_block(drcblk, p - 1, btab, a); + void *tmp; + + // XXX: kill links somehow? + dbg(2, " killing entry %08x, blkid %d", bd->addr, bd - block_tables[tcache_id]); + if (bd->addr == 0 || bd->tcache_ptr == NULL) { + dbg(1, " killing dead block!? %08x", bd->addr); + return bd->tcache_ptr; } - do { - *p++ = 0; + // since we never reuse space of dead blocks, + // insert jump to dispatcher for blocks that are linked to this point + //emith_jump_at(bd->tcache_ptr, sh2_drc_dispatcher); + + // attempt to handle self-modifying blocks by exiting at nearest known PC + tmp = tcache_ptr; + tcache_ptr = bd->tcache_ptr; + emit_move_r_imm32(SHR_PC, bd->addr); + rcache_flush(); + emith_jump(sh2_drc_dispatcher); + tcache_ptr = tmp; + + bd->addr = 0; + return bd->tcache_ptr; +} + +static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask) +{ + //block_link *bl = block_links[tcache_id]; + //int bl_count = block_link_counts[tcache_id]; + block_desc *btab = block_tables[tcache_id]; + u16 *p = drc_ram_blk + ((a & mask) >> shift); + u16 *pmax = drc_ram_blk + (mask >> shift); + void *tcache_min, *tcache_max; + int zeros; + u16 *pt; + + // Figure out what the main block is, as subblocks also have the flag set. + // This relies on sub having single entry. It's possible that innocent + // block might be hit, but that's not such a big deal. + if ((p[0] >> 1) != (p[1] >> 1)) { + for (; p > drc_ram_blk; p--) + if (p[-1] == 0 || (p[-1] >> 1) == (*p >> 1)) + break; } - while ((*p >> 1) == id); + pt = p; + + for (; p > drc_ram_blk; p--) + if ((*p & 1)) + break; + + if (!(*p & 1)) { + dbg(1, "smc rm: missing block start for %08x?", a); + p = pt; + } + + if (*p == 0) + return; + + tcache_min = tcache_max = sh2_smc_rm_block_entry(&btab[*p >> 1], tcache_id); + *p = 0; + + for (p++, zeros = 0; p < pmax && zeros < MAX_LITERAL_OFFSET / 2; p++) { + int id = *p >> 1; + if (id == 0) { + // there can be holes because games sometimes keep variables + // directly in literal pool and we don't inline them to avoid recompile + // (Star Wars Arcade) + zeros++; + continue; + } + if (*p & 1) { + if (id == (p[1] >> 1)) + // hit other block + break; + tcache_max = sh2_smc_rm_block_entry(&btab[id], tcache_id); + } + *p = 0; + } + + host_instructions_updated(tcache_min, (void *)((char *)tcache_max + 4*4 + 4)); } void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid) { - u16 *drcblk = Pico32xMem->drcblk_ram; - u16 *p = drcblk + ((a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT); - - dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(drcblk, p, block_tables[0], a); + dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); + sh2_smc_rm_block(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff); } void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid) { - u16 *drcblk = Pico32xMem->drcblk_da[cpuid]; - u16 *p = drcblk + ((a & 0xfff) >> SH2_DRCBLK_DA_SHIFT); - - dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(drcblk, p, block_tables[1 + cpuid], a); + dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); + sh2_smc_rm_block(a, Pico32xMem->drcblk_da[cpuid], + 1 + cpuid, SH2_DRCBLK_DA_SHIFT, 0xfff); } void sh2_execute(SH2 *sh2c, int cycles) @@ -2898,12 +2978,12 @@ void sh2_execute(SH2 *sh2c, int cycles) // TODO: irq cycles ret_cycles = (signed int)sh2c->sr >> 12; if (ret_cycles > 0) - printf("warning: drc returned with cycles: %d\n", ret_cycles); + dbg(1, "warning: drc returned with cycles: %d", ret_cycles); sh2c->cycles_done += cycles - ret_cycles; } -#if (DRC_DEBUG & 1) +#if (DRC_DEBUG & 2) void block_stats(void) { int c, b, i, total = 0; @@ -2979,9 +3059,7 @@ int sh2_drc_init(SH2 *sh2) drc_cmn_init(); tcache_ptr = tcache; sh2_generate_utils(); -#ifdef ARM - cache_flush_d_inval_i(tcache, tcache_ptr); -#endif + host_instructions_updated(tcache, tcache_ptr); tcache_bases[0] = tcache_ptrs[0] = tcache_ptr; for (i = 1; i < ARRAY_SIZE(tcache_bases); i++) @@ -2990,7 +3068,7 @@ int sh2_drc_init(SH2 *sh2) // tmp PicoOpt |= POPT_DIS_VDP_FIFO; -#if (DRC_DEBUG & 2) +#if (DRC_DEBUG & 4) for (i = 0; i < ARRAY_SIZE(block_tables); i++) tcache_dsm_ptrs[i] = tcache_bases[i]; // disasm the utils @@ -3023,7 +3101,7 @@ void sh2_drc_finish(SH2 *sh2) block_stats(); for (i = 0; i < TCACHE_BUFFERS; i++) { -#if (DRC_DEBUG & 2) +#if (DRC_DEBUG & 4) printf("~~~ tcache %d\n", i); tcache_dsm_ptrs[i] = tcache_bases[i]; tcache_ptr = tcache_ptrs[i];