X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=cpu%2Fsh2%2Fcompiler.c;h=0e8b65b39de2545589a18478445d00f9ba506243;hb=d602fd4f739acca7145b4208134da15fad2a6c6e;hp=84b9736f1af7f77d09c8c125ca9cb08f597d7275;hpb=6976a54726c9cebe755ee3803ab52517a9d23001;p=picodrive.git diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 84b9736..0e8b65b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -47,10 +47,11 @@ #define MAX_LOCAL_BRANCHES 32 // debug stuff -// 1 - warnings/errors -// 2 - block info/smc -// 4 - asm -// 8 - runtime block entry log +// 01 - warnings/errors +// 02 - block info/smc +// 04 - asm +// 08 - runtime block entry log +// 10 - smc self-check // { #ifndef DRC_DEBUG #define DRC_DEBUG 0 @@ -119,13 +120,16 @@ enum op_types { OP_BRANCH_RF, // indirect far (PC + Rm) OP_SETCLRT, // T flag set/clear OP_MOVE, // register move - OP_LOAD_POOL, // literal pool load + OP_LOAD_POOL, // literal pool load, imm is address + OP_MOVA, OP_SLEEP, OP_RTE, }; #ifdef DRC_SH2 +static int literal_disabled_frames; + #if (DRC_DEBUG & 4) static u8 *tcache_dsm_ptrs[3]; static char sh2dasm_buff[64]; @@ -187,7 +191,8 @@ struct block_entry { struct block_desc { u32 addr; // block start SH2 PC address - u32 end_addr; // address after last op or literal + u16 size; // ..of recompiled insns+lit. pool + u16 size_nolit; // same without literals #if (DRC_DEBUG & 2) int refcount; #endif @@ -219,7 +224,7 @@ static const int ram_sizes[TCACHE_BUFFERS] = { 0x1000, 0x1000, }; -#define ADDR_TO_BLOCK_PAGE 0x100 +#define INVAL_PAGE_SIZE 0x100 struct block_list { struct block_desc *block; @@ -227,7 +232,7 @@ struct block_list { }; // array of pointers to block_lists for RAM and 2 data arrays -// each array has len: sizeof(mem) / ADDR_TO_BLOCK_PAGE +// each array has len: sizeof(mem) / INVAL_PAGE_SIZE static struct block_list **inval_lookup[TCACHE_BUFFERS]; static const int hash_table_sizes[TCACHE_BUFFERS] = { @@ -266,6 +271,8 @@ typedef struct { #ifdef __arm__ #include "../drc/emit_arm.c" +#ifndef __MACH__ + static const int reg_map_g2h[] = { 4, 5, 6, 7, 8, -1, -1, -1, @@ -275,6 +282,20 @@ static const int reg_map_g2h[] = { -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, }; +#else + +// no r9.. +static const int reg_map_g2h[] = { + 4, 5, 6, 7, + -1, -1, -1, -1, + -1, -1, -1, -1, + -1, -1, -1, 8, // r12 .. sp + -1, -1, -1, 10, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, + -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, +}; + +#endif + static temp_reg_t reg_temp[] = { { 0, }, { 1, }, @@ -329,7 +350,7 @@ static u32 REGPARM(2) (*sh2_drc_read16)(u32 a, SH2 *sh2); static u32 REGPARM(2) (*sh2_drc_read32)(u32 a, SH2 *sh2); static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d); -static int REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2); +static void REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2); // address space stuff static int dr_ctx_get_mem_ptr(u32 a, u32 *mask) @@ -343,6 +364,7 @@ static int dr_ctx_get_mem_ptr(u32 a, u32 *mask) } else if ((a & 0xfffff000) == 0xc0000000) { // data array + // FIXME: access sh2->data_array instead poffs = offsetof(SH2, p_da); *mask = 0xfff; } @@ -398,7 +420,7 @@ static void add_to_block_list(struct block_list **blist, struct block_desc *bloc static void rm_from_block_list(struct block_list **blist, struct block_desc *block) { struct block_list *prev = NULL, *current = *blist; - for (; current != NULL; prev = current, current = current->next) { + for (; current != NULL; current = current->next) { if (current->block == block) { if (prev == NULL) *blist = current->next; @@ -407,9 +429,10 @@ static void rm_from_block_list(struct block_list **blist, struct block_desc *blo free(current); return; } + prev = current; } dbg(1, "can't rm block %p (%08x-%08x)", - block, block->addr, block->end_addr); + block, block->addr, block->addr + block->size); } static void rm_block_list(struct block_list **blist) @@ -448,7 +471,7 @@ static void REGPARM(1) flush_tcache(int tcid) tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; #endif - for (i = 0; i < ram_sizes[tcid] / ADDR_TO_BLOCK_PAGE; i++) + for (i = 0; i < ram_sizes[tcid] / INVAL_PAGE_SIZE; i++) rm_block_list(&inval_lookup[tcid][i]); } @@ -493,7 +516,31 @@ missing: dbg(1, "rm_from_hashlist: be %p %08x missing?", be, be->pc); } -static struct block_desc *dr_add_block(u32 addr, u32 end_addr, int is_slave, int *blk_id) +static void unregister_links(struct block_entry *be, int tcache_id) +{ + struct block_link *bl_unresolved = unresolved_links[tcache_id]; + struct block_link *bl, *bl_next; + + for (bl = be->links; bl != NULL; ) { + bl_next = bl->next; + bl->next = bl_unresolved; + bl_unresolved = bl; + bl = bl_next; + } + be->links = NULL; + unresolved_links[tcache_id] = bl_unresolved; +} + +// unlike sh2_smc_rm_block, the block stays and can still be accessed +// by other already directly linked blocks, just not preferred +static void kill_block_entry(struct block_entry *be, int tcache_id) +{ + rm_from_hashlist(be, tcache_id); + unregister_links(be, tcache_id); +} + +static struct block_desc *dr_add_block(u32 addr, u16 size_lit, + u16 size_nolit, int is_slave, int *blk_id) { struct block_entry *be; struct block_desc *bd; @@ -502,8 +549,10 @@ static struct block_desc *dr_add_block(u32 addr, u32 end_addr, int is_slave, int // do a lookup to get tcache_id and override check be = dr_get_entry(addr, is_slave, &tcache_id); - if (be != NULL) - dbg(1, "block override for %08x", addr); + if (be != NULL) { + dbg(1, "block override for %08x, was %p", addr, be->tcache_ptr); + kill_block_entry(be, tcache_id); + } bcount = &block_counts[tcache_id]; if (*bcount >= block_max_counts[tcache_id]) { @@ -513,7 +562,8 @@ static struct block_desc *dr_add_block(u32 addr, u32 end_addr, int is_slave, int bd = &block_tables[tcache_id][*bcount]; bd->addr = addr; - bd->end_addr = end_addr; + bd->size = size_lit; + bd->size_nolit = size_nolit; bd->entry_count = 1; bd->entryp[0].pc = addr; @@ -630,11 +680,11 @@ static void dr_link_blocks(struct block_entry *be, int tcache_id) } #define ADD_TO_ARRAY(array, count, item, failcode) \ - array[count++] = item; \ if (count >= ARRAY_SIZE(array)) { \ dbg(1, "warning: " #array " overflow"); \ failcode; \ - } + } \ + array[count++] = item; static int find_in_array(u32 *array, size_t size, u32 what) { @@ -994,7 +1044,8 @@ static void rcache_unlock_all(void) reg_temp[i].flags &= ~HRF_LOCKED; } -static inline u32 rcache_used_hreg_mask(void) +#ifdef DRC_CMP +static u32 rcache_used_hreg_mask(void) { u32 mask = 0; int i; @@ -1005,6 +1056,7 @@ static inline u32 rcache_used_hreg_mask(void) return mask; } +#endif static void rcache_clean(void) { @@ -1087,8 +1139,11 @@ static void emit_or_t_if_eq(int srr) // reg cache must be clean before call static int emit_memhandler_read_(int size, int ram_check) { - int arg0, arg1; + int arg1; +#if 0 + int arg0; host_arg2reg(arg0, 0); +#endif rcache_clean(); @@ -1100,7 +1155,8 @@ static int emit_memhandler_read_(int size, int ram_check) arg1 = rcache_get_tmp_arg(1); emith_move_r_r(arg1, CONTEXT_REG); -#ifndef PDB_NET +#if 0 // can't do this because of unmapped reads + // ndef PDB_NET if (ram_check && Pico.rom == (void *)0x02000000 && Pico32xMem->sdram == (void *)0x06000000) { int tmp = rcache_get_tmp(); emith_and_r_r_imm(tmp, arg0, 0xfb000000); @@ -1204,7 +1260,7 @@ static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int siz return hr2; } -static void emit_memhandler_write(int size, u32 pc) +static void emit_memhandler_write(int size) { int ctxr; host_arg2reg(ctxr, 2); @@ -1348,6 +1404,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; + u32 literals_disabled:1; } drcf = { 0, }; // PC of current, first, last SH2 insn @@ -1365,6 +1422,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) int op; base_pc = sh2->pc; + drcf.literals_disabled = literal_disabled_frames != 0; // get base/validate PC dr_pc_base = dr_get_pc_base(base_pc, sh2->is_slave); @@ -1386,7 +1444,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // initial passes to disassemble and analyze the block scan_block(base_pc, sh2->is_slave, op_flags, &end_pc, &end_literals); - block = dr_add_block(base_pc, end_literals, sh2->is_slave, &blkid_main); + if (drcf.literals_disabled) + end_literals = end_pc; + + block = dr_add_block(base_pc, end_literals - base_pc, + end_pc - base_pc, sh2->is_slave, &blkid_main); if (block == NULL) return NULL; @@ -1445,13 +1507,22 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // make block entry v = block->entry_count; - if (v < ARRAY_SIZE(block->entryp)) { + if (v < ARRAY_SIZE(block->entryp)) + { + struct block_entry *be_old; + block->entryp[v].pc = pc; block->entryp[v].tcache_ptr = tcache_ptr; block->entryp[v].links = NULL; #if (DRC_DEBUG & 2) block->entryp[v].block = block; #endif + be_old = dr_get_entry(pc, sh2->is_slave, &tcache_id); + if (be_old != NULL) { + dbg(1, "entry override for %08x, was %p", pc, be_old->tcache_ptr); + kill_block_entry(be_old, tcache_id); + } + add_to_hashlist(&block->entryp[v], tcache_id); block->entry_count++; @@ -1479,6 +1550,22 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emit_move_r_imm32(SHR_PC, pc); rcache_clean(); +#if (DRC_DEBUG & 0x10) + rcache_get_reg_arg(0, SHR_PC); + tmp = emit_memhandler_read(2); + tmp2 = rcache_get_tmp(); + tmp3 = rcache_get_tmp(); + emith_move_r_imm(tmp2, FETCH32(pc)); + emith_move_r_imm(tmp3, 0); + emith_cmp_r_r(tmp, tmp2); + EMITH_SJMP_START(DCOND_EQ); + emith_read_r_r_offs_c(DCOND_NE, tmp3, tmp3, 0); // crash + EMITH_SJMP_END(DCOND_EQ); + rcache_free_tmp(tmp); + rcache_free_tmp(tmp2); + rcache_free_tmp(tmp3); +#endif + // check cycles sr = rcache_get_reg(SHR_SR, RC_GR_READ); emith_cmp_r_imm(sr, 0); @@ -1519,9 +1606,35 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW); DELAY_SAVE_T(sr); } - if (delay_dep_fw & ~BITMASK1(SHR_T)) - dbg(1, "unhandled delay_dep_fw: %x", delay_dep_fw & ~BITMASK1(SHR_T)); - if (delay_dep_bk) + if (delay_dep_bk & BITMASK1(SHR_PC)) { + if (opd->op != OP_LOAD_POOL && opd->op != OP_MOVA) { + // can only be those 2 really.. + elprintf_sh2(sh2, EL_ANOMALY, + "drc: illegal slot insn %04x @ %08x?", op, pc - 2); + } + if (opd->imm != 0) + ; // addr already resolved somehow + else { + switch (ops[i-1].op) { + case OP_BRANCH: + emit_move_r_imm32(SHR_PC, ops[i-1].imm); + break; + case OP_BRANCH_CT: + case OP_BRANCH_CF: + tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE); + sr = rcache_get_reg(SHR_SR, RC_GR_READ); + emith_move_r_imm(tmp, pc); + emith_tst_r_imm(sr, T); + tmp2 = ops[i-1].op == OP_BRANCH_CT ? DCOND_NE : DCOND_EQ; + emith_move_r_imm_c(tmp2, tmp, ops[i-1].imm); + break; + // case OP_BRANCH_R OP_BRANCH_RF - PC already loaded + } + } + } + //if (delay_dep_fw & ~BITMASK1(SHR_T)) + // dbg(1, "unhandled delay_dep_fw: %x", delay_dep_fw & ~BITMASK1(SHR_T)); + if (delay_dep_bk & ~BITMASK2(SHR_PC, SHR_PR)) dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk); } @@ -1575,7 +1688,56 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_add_r_imm(tmp, 4*2); drcf.test_irq = 1; drcf.pending_branch_indirect = 1; - break; + goto end_op; + + case OP_LOAD_POOL: +#if PROPAGATE_CONSTANTS + if (opd->imm != 0 && opd->imm < end_literals + && literal_addr_count < MAX_LITERALS) + { + ADD_TO_ARRAY(literal_addr, literal_addr_count, opd->imm,); + if (opd->size == 2) + tmp = FETCH32(opd->imm); + else + tmp = (u32)(int)(signed short)FETCH_OP(opd->imm); + gconst_new(GET_Rn(), tmp); + } + else +#endif + { + tmp = rcache_get_tmp_arg(0); + if (opd->imm != 0) + emith_move_r_imm(tmp, opd->imm); + else { + // have to calculate read addr from PC + tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); + if (opd->size == 2) { + emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4); + emith_bic_r_imm(tmp, 3); + } + else + emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 2); + } + tmp2 = emit_memhandler_read(opd->size); + tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); + if (opd->size == 2) + emith_move_r_r(tmp3, tmp2); + else + emith_sext(tmp3, tmp2, 16); + rcache_free_tmp(tmp2); + } + goto end_op; + + case OP_MOVA: + if (opd->imm != 0) + emit_move_r_imm32(SHR_R0, opd->imm); + else { + tmp = rcache_get_reg(SHR_R0, RC_GR_WRITE); + tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); + emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4); + emith_bic_r_imm(tmp, 3); + } + goto end_op; } switch ((op >> 12) & 0x0f) @@ -1613,7 +1775,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg_arg(0, SHR_R0); tmp3 = rcache_get_reg(GET_Rn(), RC_GR_READ); emith_add_r_r(tmp2, tmp3); - emit_memhandler_write(op & 3, pc); + emit_memhandler_write(op & 3); goto end_op; case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111 @@ -1732,7 +1894,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg_arg(1, GET_Rm()); if (op & 0x0f) emith_add_r_imm(tmp, (op & 0x0f) * 4); - emit_memhandler_write(2, pc); + emit_memhandler_write(2); goto end_op; case 0x02: @@ -1744,17 +1906,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_clean(); rcache_get_reg_arg(0, GET_Rn()); rcache_get_reg_arg(1, GET_Rm()); - emit_memhandler_write(op & 3, pc); + emit_memhandler_write(op & 3); goto end_op; - case 0x04: // MOV.B Rm,@–Rn 0010nnnnmmmm0100 - case 0x05: // MOV.W Rm,@–Rn 0010nnnnmmmm0101 - case 0x06: // MOV.L Rm,@–Rn 0010nnnnmmmm0110 + case 0x04: // MOV.B Rm,@-Rn 0010nnnnmmmm0100 + case 0x05: // MOV.W Rm,@-Rn 0010nnnnmmmm0101 + case 0x06: // MOV.L Rm,@-Rn 0010nnnnmmmm0110 + rcache_get_reg_arg(1, GET_Rm()); // for Rm == Rn tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); emith_sub_r_imm(tmp, (1 << (op & 3))); rcache_clean(); rcache_get_reg_arg(0, GET_Rn()); - rcache_get_reg_arg(1, GET_Rm()); - emit_memhandler_write(op & 3, pc); + emit_memhandler_write(op & 3); goto end_op; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 sr = rcache_get_reg(SHR_SR, RC_GR_RMW); @@ -1805,13 +1967,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) sr = rcache_get_reg(SHR_SR, RC_GR_RMW); emith_bic_r_imm(sr, T); emith_tst_r_imm(tmp, 0x000000ff); - emit_or_t_if_eq(tmp); + emit_or_t_if_eq(sr); emith_tst_r_imm(tmp, 0x0000ff00); - emit_or_t_if_eq(tmp); + emit_or_t_if_eq(sr); emith_tst_r_imm(tmp, 0x00ff0000); - emit_or_t_if_eq(tmp); + emit_or_t_if_eq(sr); emith_tst_r_imm(tmp, 0xff000000); - emit_or_t_if_eq(tmp); + emit_or_t_if_eq(sr); rcache_free_tmp(tmp); goto end_op; case 0x0d: // XTRCT Rm,Rn 0010nnnnmmmm1101 @@ -1985,7 +2147,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case 1: // DT Rn 0100nnnn00010000 sr = rcache_get_reg(SHR_SR, RC_GR_RMW); -#ifndef DRC_CMP +#if 0 // scheduling needs tuning if (FETCH_OP(pc) == 0x8bfd) { // BF #-2 if (gconst_get(GET_Rn(), &tmp)) { // XXX: limit burned cycles @@ -2035,22 +2197,22 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x03: switch (op & 0x3f) { - case 0x02: // STS.L MACH,@–Rn 0100nnnn00000010 + case 0x02: // STS.L MACH,@-Rn 0100nnnn00000010 tmp = SHR_MACH; break; - case 0x12: // STS.L MACL,@–Rn 0100nnnn00010010 + case 0x12: // STS.L MACL,@-Rn 0100nnnn00010010 tmp = SHR_MACL; break; - case 0x22: // STS.L PR,@–Rn 0100nnnn00100010 + case 0x22: // STS.L PR,@-Rn 0100nnnn00100010 tmp = SHR_PR; break; - case 0x03: // STC.L SR,@–Rn 0100nnnn00000011 + case 0x03: // STC.L SR,@-Rn 0100nnnn00000011 tmp = SHR_SR; break; - case 0x13: // STC.L GBR,@–Rn 0100nnnn00010011 + case 0x13: // STC.L GBR,@-Rn 0100nnnn00010011 tmp = SHR_GBR; break; - case 0x23: // STC.L VBR,@–Rn 0100nnnn00100011 + case 0x23: // STC.L VBR,@-Rn 0100nnnn00100011 tmp = SHR_VBR; break; default: @@ -2063,7 +2225,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp3 = rcache_get_reg_arg(1, tmp); if (tmp == SHR_SR) emith_clear_msb(tmp3, tmp3, 22); // reserved bits defined by ISA as 0 - emit_memhandler_write(2, pc); + emit_memhandler_write(2); goto end_op; case 0x04: case 0x05: @@ -2203,7 +2365,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_move_r_r(tmp2, tmp); rcache_free_tmp(tmp); rcache_get_reg_arg(0, GET_Rn()); - emit_memhandler_write(0, pc); + emit_memhandler_write(0); break; default: goto default_; @@ -2363,7 +2525,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp3 = (op & 0x100) >> 8; if (op & 0x0f) emith_add_r_imm(tmp, (op & 0x0f) << tmp3); - emit_memhandler_write(tmp3, pc); + emit_memhandler_write(tmp3); goto end_op; case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd @@ -2384,27 +2546,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } goto default_; - ///////////////////////////////////////////// - case 0x09: - // MOV.W @(disp,PC),Rn 1001nnnndddddddd - tmp = pc + (op & 0xff) * 2 + 2; -#if PROPAGATE_CONSTANTS - if (tmp < end_pc + MAX_LITERAL_OFFSET && literal_addr_count < MAX_LITERALS) { - ADD_TO_ARRAY(literal_addr, literal_addr_count, tmp,); - gconst_new(GET_Rn(), (u32)(int)(signed short)FETCH_OP(tmp)); - } - else -#endif - { - tmp2 = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp2, tmp); - tmp2 = emit_memhandler_read(1); - tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); - emith_sext(tmp3, tmp2, 16); - rcache_free_tmp(tmp2); - } - goto end_op; - ///////////////////////////////////////////// case 0x0c: switch (op & 0x0f00) @@ -2417,7 +2558,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg_arg(1, SHR_R0); tmp3 = (op & 0x300) >> 8; emith_add_r_imm(tmp, (op & 0xff) << tmp3); - emit_memhandler_write(tmp3, pc); + emit_memhandler_write(tmp3); goto end_op; case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd @@ -2433,21 +2574,18 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_add_r_imm(tmp, 4); tmp = rcache_get_reg_arg(1, SHR_SR); emith_clear_msb(tmp, tmp, 22); - emit_memhandler_write(2, pc); + emit_memhandler_write(2); // push PC rcache_get_reg_arg(0, SHR_SP); tmp = rcache_get_tmp_arg(1); emith_move_r_imm(tmp, pc); - emit_memhandler_write(2, pc); + emit_memhandler_write(2); // obtain new PC emit_memhandler_read_rr(SHR_PC, SHR_VBR, (op & 0xff) * 4, 2); // indirect jump -> back to dispatcher rcache_flush(); emith_jump(sh2_drc_dispatcher); goto end_op; - case 0x0700: // MOVA @(disp,PC),R0 11000111dddddddd - emit_move_r_imm32(SHR_R0, (pc + (op & 0xff) * 4 + 2) & ~3); - goto end_op; case 0x0800: // TST #imm,R0 11001000iiiiiiii tmp = rcache_get_reg(SHR_R0, RC_GR_READ); sr = rcache_get_reg(SHR_SR, RC_GR_RMW); @@ -2493,32 +2631,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) tmp3 = rcache_get_reg_arg(0, SHR_GBR); tmp4 = rcache_get_reg(SHR_R0, RC_GR_READ); emith_add_r_r(tmp3, tmp4); - emit_memhandler_write(0, pc); + emit_memhandler_write(0); goto end_op; } goto default_; - ///////////////////////////////////////////// - case 0x0d: - // MOV.L @(disp,PC),Rn 1101nnnndddddddd - tmp = (pc + (op & 0xff) * 4 + 2) & ~3; -#if PROPAGATE_CONSTANTS - if (tmp < end_pc + MAX_LITERAL_OFFSET && literal_addr_count < MAX_LITERALS) { - ADD_TO_ARRAY(literal_addr, literal_addr_count, tmp,); - gconst_new(GET_Rn(), FETCH32(tmp)); - } - else -#endif - { - tmp2 = rcache_get_tmp_arg(0); - emith_move_r_imm(tmp2, tmp); - tmp2 = emit_memhandler_read(2); - tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); - emith_move_r_r(tmp3, tmp2); - rcache_free_tmp(tmp2); - } - goto end_op; - ///////////////////////////////////////////// case 0x0e: // MOV #imm,Rn 1110nnnniiiiiiii @@ -2527,8 +2644,34 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: default_: - elprintf(EL_ANOMALY, "%csh2 drc: unhandled op %04x @ %08x", - sh2->is_slave ? 's' : 'm', op, pc - 2); + if (!(op_flags[i] & OF_B_IN_DS)) + elprintf_sh2(sh2, EL_ANOMALY, + "drc: illegal op %04x @ %08x", op, pc - 2); + + tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); + emith_sub_r_imm(tmp, 4*2); + // push SR + tmp = rcache_get_reg_arg(0, SHR_SP); + emith_add_r_imm(tmp, 4); + tmp = rcache_get_reg_arg(1, SHR_SR); + emith_clear_msb(tmp, tmp, 22); + emit_memhandler_write(2); + // push PC + rcache_get_reg_arg(0, SHR_SP); + tmp = rcache_get_tmp_arg(1); + if (drcf.pending_branch_indirect) { + tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); + emith_move_r_r(tmp, tmp2); + } + else + emith_move_r_imm(tmp, pc - 2); + emit_memhandler_write(2); + // obtain new PC + v = (op_flags[i] & OF_B_IN_DS) ? 6 : 4; + emit_memhandler_read_rr(SHR_PC, SHR_VBR, v * 4, 2); + // indirect jump -> back to dispatcher + rcache_flush(); + emith_jump(sh2_drc_dispatcher); break; } @@ -2546,6 +2689,8 @@ end_op: if (drcf.test_irq && !drcf.pending_branch_direct) { sr = rcache_get_reg(SHR_SR, RC_GR_RMW); FLUSH_CYCLES(sr); + if (!drcf.pending_branch_indirect) + emit_move_r_imm32(SHR_PC, pc); rcache_flush(); emith_call(sh2_drc_test_irq); drcf.test_irq = 0; @@ -2667,7 +2812,8 @@ end_op: // mark memory blocks as containing compiled code // override any overlay blocks as they become unreachable anyway - if (tcache_id != 0 || (block->addr & 0xc7fc0000) == 0x06000000) + if ((block->addr & 0xc7fc0000) == 0x06000000 + || (block->addr & 0xfffff000) == 0xc0000000) { u16 *drc_ram_blk = NULL; u32 addr, mask = 0, shift = 0; @@ -2678,7 +2824,7 @@ end_op: shift = SH2_DRCBLK_DA_SHIFT; mask = 0xfff; } - else if ((block->addr & 0xc7fc0000) == 0x06000000) { + else { // SDRAM drc_ram_blk = Pico32xMem->drcblk_ram; shift = SH2_DRCBLK_RAM_SHIFT; @@ -2697,9 +2843,9 @@ end_op: } // add to invalidation lookup lists - addr = base_pc & ~(ADDR_TO_BLOCK_PAGE - 1); - for (; addr < end_literals; addr += ADDR_TO_BLOCK_PAGE) { - i = (addr & mask) / ADDR_TO_BLOCK_PAGE; + addr = base_pc & ~(INVAL_PAGE_SIZE - 1); + for (; addr < end_literals; addr += INVAL_PAGE_SIZE) { + i = (addr & mask) / INVAL_PAGE_SIZE; add_to_block_list(&inval_lookup[tcache_id][i], block); } } @@ -2709,6 +2855,9 @@ end_op: host_instructions_updated(block_entry_ptr, tcache_ptr); do_host_disasm(tcache_id); + + if (drcf.literals_disabled && literal_addr_count) + dbg(1, "literals_disabled && literal_addr_count?"); dbg(2, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f", tcache_id, blkid_main, tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id], @@ -2895,28 +3044,28 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 ram_mask) +static void sh2_smc_rm_block(struct block_desc *bd, int tcache_id, u32 ram_mask) { - struct block_link *bl, *bl_next, *bl_unresolved; + u32 i, addr, end_addr; void *tmp; - u32 i, addr; - dbg(2, " killing entry %08x-%08x, blkid %d,%d", - bd->addr, bd->end_addr, tcache_id, bd - block_tables[tcache_id]); + dbg(2, " killing block %08x-%08x-%08x, blkid %d,%d", + bd->addr, bd->addr + bd->size_nolit, bd->addr + bd->size, + tcache_id, bd - block_tables[tcache_id]); if (bd->addr == 0 || bd->entry_count == 0) { dbg(1, " killing dead block!? %08x", bd->addr); return; } // remove from inval_lookup - addr = bd->addr & ~(ADDR_TO_BLOCK_PAGE - 1); - for (; addr < bd->end_addr; addr += ADDR_TO_BLOCK_PAGE) { - i = (addr & ram_mask) / ADDR_TO_BLOCK_PAGE; + addr = bd->addr & ~(INVAL_PAGE_SIZE - 1); + end_addr = bd->addr + bd->size; + for (; addr < end_addr; addr += INVAL_PAGE_SIZE) { + i = (addr & ram_mask) / INVAL_PAGE_SIZE; rm_from_block_list(&inval_lookup[tcache_id][i], bd); } tmp = tcache_ptr; - bl_unresolved = unresolved_links[tcache_id]; // remove from hash table, make incoming links unresolved // XXX: maybe patch branches w/flush instead? @@ -2926,44 +3075,55 @@ static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 ram // since we never reuse tcache space of dead blocks, // insert jump to dispatcher for blocks that are linked to this tcache_ptr = bd->entryp[i].tcache_ptr; - emit_move_r_imm32(SHR_PC, bd->addr); + emit_move_r_imm32(SHR_PC, bd->entryp[i].pc); rcache_flush(); emith_jump(sh2_drc_dispatcher); host_instructions_updated(bd->entryp[i].tcache_ptr, tcache_ptr); - for (bl = bd->entryp[i].links; bl != NULL; ) { - bl_next = bl->next; - bl->next = bl_unresolved; - bl_unresolved = bl; - bl = bl_next; - } + unregister_links(&bd->entryp[i], tcache_id); } tcache_ptr = tmp; - unresolved_links[tcache_id] = bl_unresolved; - bd->addr = bd->end_addr = 0; + bd->addr = bd->size = bd->size_nolit = 0; bd->entry_count = 0; } -static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask) +/* +04205:243: == msh2 block #0,200 060017a8-060017f0 -> 0x27cb9c + 060017a8 d11c MOV.L @($70,PC),R1 ; @$0600181c + +04230:261: msh2 xsh w32 [260017a8] d225e304 +04230:261: msh2 smc check @260017a8 +04239:226: = ssh2 enter 060017a8 0x27cb9c, c=173 +*/ +static void sh2_smc_rm_blocks(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask) { struct block_list **blist = NULL, *entry; - u32 from = ~0, to = 0; struct block_desc *block; + u32 start_addr, end_addr, taddr, i; + u32 from = ~0, to = 0; + + // ignore cache-through + a &= ~0x20000000; - blist = &inval_lookup[tcache_id][(a & mask) / ADDR_TO_BLOCK_PAGE]; + blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE]; entry = *blist; while (entry != NULL) { block = entry->block; - if (block->addr <= a && a < block->end_addr) { - if (block->addr < from) - from = block->addr; - if (block->end_addr > to) - to = block->end_addr; - - sh2_smc_rm_block_entry(block, tcache_id, mask); + start_addr = block->addr & ~0x20000000; + end_addr = start_addr + block->size; + if (start_addr <= a && a < end_addr) { + // get addr range that includes all removed blocks + if (from > start_addr) + from = start_addr; + if (to < end_addr) + to = end_addr; + + sh2_smc_rm_block(block, tcache_id, mask); + if (a >= start_addr + block->size_nolit) + literal_disabled_frames = 3; // entry lost, restart search entry = *blist; @@ -2972,16 +3132,29 @@ static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, entry = entry->next; } - // update range to not clear still alive blocks - for (entry = *blist; entry != NULL; entry = entry->next) { - block = entry->block; - if (block->addr > a) { - if (to > block->addr) - to = block->addr; - } - else { - if (from < block->end_addr) - from = block->end_addr; + if (from >= to) + return; + + // update range around a to match latest state + from &= ~(INVAL_PAGE_SIZE - 1); + to |= (INVAL_PAGE_SIZE - 1); + for (taddr = from; taddr < to; taddr += INVAL_PAGE_SIZE) { + i = (taddr & mask) / INVAL_PAGE_SIZE; + entry = inval_lookup[tcache_id][i]; + + for (; entry != NULL; entry = entry->next) { + block = entry->block; + + start_addr = block->addr & ~0x20000000; + if (start_addr > a) { + if (to > start_addr) + to = start_addr; + } + else { + end_addr = start_addr + block->size; + if (from < end_addr) + from = end_addr; + } } } @@ -2995,22 +3168,20 @@ static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid) { dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff); + sh2_smc_rm_blocks(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff); } void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid) { dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(a, Pico32xMem->drcblk_da[cpuid], + sh2_smc_rm_blocks(a, Pico32xMem->drcblk_da[cpuid], 1 + cpuid, SH2_DRCBLK_DA_SHIFT, 0xfff); } -int sh2_execute(SH2 *sh2c, int cycles) +int sh2_execute_drc(SH2 *sh2c, int cycles) { int ret_cycles; - sh2c->cycles_timeslice = cycles; - // cycles are kept in SHR_SR unused bits (upper 20) // bit11 contains T saved for delay slot // others are usual SH2 flags @@ -3023,7 +3194,8 @@ int sh2_execute(SH2 *sh2c, int cycles) if (ret_cycles > 0) dbg(1, "warning: drc returned with cycles: %d", ret_cycles); - return sh2c->cycles_timeslice - ret_cycles; + sh2c->sr &= 0x3f3; + return ret_cycles; } #if (DRC_DEBUG & 2) @@ -3075,12 +3247,18 @@ void sh2_drc_flush_all(void) void sh2_drc_mem_setup(SH2 *sh2) { // fill the convenience pointers - sh2->p_bios = sh2->is_slave ? Pico32xMem->sh2_rom_s : Pico32xMem->sh2_rom_m; - sh2->p_da = Pico32xMem->data_array[sh2->is_slave]; + sh2->p_bios = sh2->is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w; + sh2->p_da = sh2->data_array; sh2->p_sdram = Pico32xMem->sdram; sh2->p_rom = Pico.rom; } +void sh2_drc_frame(void) +{ + if (literal_disabled_frames > 0) + literal_disabled_frames--; +} + int sh2_drc_init(SH2 *sh2) { int i; @@ -3097,7 +3275,7 @@ int sh2_drc_init(SH2 *sh2) if (block_link_pool[i] == NULL) goto fail; - inval_lookup[i] = calloc(ram_sizes[i] / ADDR_TO_BLOCK_PAGE, + inval_lookup[i] = calloc(ram_sizes[i] / INVAL_PAGE_SIZE, sizeof(inval_lookup[0])); if (inval_lookup[i] == NULL) goto fail; @@ -3183,12 +3361,12 @@ static void *dr_get_pc_base(u32 pc, int is_slave) if ((pc & ~0x7ff) == 0) { // BIOS - ret = is_slave ? Pico32xMem->sh2_rom_s : Pico32xMem->sh2_rom_m; + ret = is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w; mask = 0x7ff; } else if ((pc & 0xfffff000) == 0xc0000000) { // data array - ret = Pico32xMem->data_array[is_slave]; + ret = sh2s[is_slave].data_array; mask = 0xfff; } else if ((pc & 0xc6000000) == 0x06000000) { @@ -3198,7 +3376,8 @@ static void *dr_get_pc_base(u32 pc, int is_slave) } else if ((pc & 0xc6000000) == 0x02000000) { // ROM - ret = Pico.rom; + if ((pc & 0x3fffff) < Pico.romsize) + ret = Pico.rom; mask = 0x3fffff; } @@ -3214,6 +3393,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, u16 *dr_pc_base; u32 pc, op, tmp; u32 end_pc, end_literals = 0; + u32 lowest_mova = 0; struct op_data *opd; int next_is_delay = 0; int end_block = 0; @@ -3413,9 +3593,9 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->source = BITMASK1(GET_Rm()); opd->source = BITMASK1(GET_Rn()); break; - case 0x04: // MOV.B Rm,@–Rn 0010nnnnmmmm0100 - case 0x05: // MOV.W Rm,@–Rn 0010nnnnmmmm0101 - case 0x06: // MOV.L Rm,@–Rn 0010nnnnmmmm0110 + case 0x04: // MOV.B Rm,@-Rn 0010nnnnmmmm0100 + case 0x05: // MOV.W Rm,@-Rn 0010nnnnmmmm0101 + case 0x06: // MOV.L Rm,@-Rn 0010nnnnmmmm0110 opd->source = BITMASK2(GET_Rm(), GET_Rn()); opd->dest = BITMASK1(GET_Rn()); break; @@ -3533,24 +3713,24 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x03: switch (op & 0x3f) { - case 0x02: // STS.L MACH,@–Rn 0100nnnn00000010 + case 0x02: // STS.L MACH,@-Rn 0100nnnn00000010 tmp = SHR_MACH; break; - case 0x12: // STS.L MACL,@–Rn 0100nnnn00010010 + case 0x12: // STS.L MACL,@-Rn 0100nnnn00010010 tmp = SHR_MACL; break; - case 0x22: // STS.L PR,@–Rn 0100nnnn00100010 + case 0x22: // STS.L PR,@-Rn 0100nnnn00100010 tmp = SHR_PR; break; - case 0x03: // STC.L SR,@–Rn 0100nnnn00000011 + case 0x03: // STC.L SR,@-Rn 0100nnnn00000011 tmp = SHR_SR; opd->cycles = 2; break; - case 0x13: // STC.L GBR,@–Rn 0100nnnn00010011 + case 0x13: // STC.L GBR,@-Rn 0100nnnn00010011 tmp = SHR_GBR; opd->cycles = 2; break; - case 0x23: // STC.L VBR,@–Rn 0100nnnn00100011 + case 0x23: // STC.L VBR,@-Rn 0100nnnn00100011 tmp = SHR_VBR; opd->cycles = 2; break; @@ -3729,13 +3909,16 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->source = BITMASK1(GET_Rm()); opd->dest |= BITMASK1(GET_Rn()); break; + case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 + opd->source = BITMASK2(GET_Rm(), SHR_T); + opd->dest = BITMASK2(GET_Rn(), SHR_T); + break; case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011 opd->op = OP_MOVE; goto arith_rmrn; case 0x07: // NOT Rm,Rn 0110nnnnmmmm0111 case 0x08: // SWAP.B Rm,Rn 0110nnnnmmmm1000 case 0x09: // SWAP.W Rm,Rn 0110nnnnmmmm1001 - case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 case 0x0b: // NEG Rm,Rn 0110nnnnmmmm1011 case 0x0c: // EXTU.B Rm,Rn 0110nnnnmmmm1100 case 0x0d: // EXTU.W Rm,Rn 0110nnnnmmmm1101 @@ -3805,9 +3988,17 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x09: // MOV.W @(disp,PC),Rn 1001nnnndddddddd opd->op = OP_LOAD_POOL; + tmp = pc + 2; + if (op_flags[i] & OF_DELAY_OP) { + if (ops[i-1].op == OP_BRANCH) + tmp = ops[i-1].imm; + else + tmp = 0; + } opd->source = BITMASK1(SHR_PC); opd->dest = BITMASK1(GET_Rn()); - opd->imm = pc + 4 + (op & 0xff) * 2; + if (tmp) + opd->imm = tmp + 2 + (op & 0xff) * 2; opd->size = 1; break; @@ -3855,8 +4046,22 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, end_block = 1; // FIXME break; case 0x0700: // MOVA @(disp,PC),R0 11000111dddddddd + opd->op = OP_MOVA; + tmp = pc + 2; + if (op_flags[i] & OF_DELAY_OP) { + if (ops[i-1].op == OP_BRANCH) + tmp = ops[i-1].imm; + else + tmp = 0; + } opd->dest = BITMASK1(SHR_R0); - opd->imm = (pc + 4 + (op & 0xff) * 4) & ~3; + if (tmp) { + opd->imm = (tmp + 2 + (op & 0xff) * 4) & ~3; + if (opd->imm >= base_pc) { + if (lowest_mova == 0 || opd->imm < lowest_mova) + lowest_mova = opd->imm; + } + } break; case 0x0800: // TST #imm,R0 11001000iiiiiiii opd->source = BITMASK1(SHR_R0); @@ -3897,9 +4102,17 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0d: // MOV.L @(disp,PC),Rn 1101nnnndddddddd opd->op = OP_LOAD_POOL; + tmp = pc + 2; + if (op_flags[i] & OF_DELAY_OP) { + if (ops[i-1].op == OP_BRANCH) + tmp = ops[i-1].imm; + else + tmp = 0; + } opd->source = BITMASK1(SHR_PC); opd->dest = BITMASK1(GET_Rn()); - opd->imm = (pc + 4 + (op & 0xff) * 2) & ~3; + if (tmp) + opd->imm = (tmp + 2 + (op & 0xff) * 4) & ~3; opd->size = 2; break; @@ -3916,6 +4129,22 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, is_slave ? 's' : 'm', op, pc); break; } + + if (op_flags[i] & OF_DELAY_OP) { + switch (opd->op) { + case OP_BRANCH: + case OP_BRANCH_CT: + case OP_BRANCH_CF: + case OP_BRANCH_R: + case OP_BRANCH_RF: + elprintf(EL_ANOMALY, "%csh2 drc: branch in DS @ %08x", + is_slave ? 's' : 'm', pc); + opd->op = OP_UNHANDLED; + op_flags[i] |= OF_B_IN_DS; + next_is_delay = 0; + break; + } + } } i_end = i; end_pc = pc; @@ -3958,6 +4187,20 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, if (end_literals < end_pc) end_literals = end_pc; + // end_literals is used to decide to inline a literal or not + // XXX: need better detection if this actually is used in write + if (lowest_mova >= base_pc) { + if (lowest_mova < end_literals) { + dbg(1, "mova for %08x, block %08x", lowest_mova, base_pc); + end_literals = end_pc; + } + if (lowest_mova < end_pc) { + dbg(1, "warning: mova inside of blk for %08x, block %08x", + lowest_mova, base_pc); + end_literals = end_pc; + } + } + *end_pc_out = end_pc; if (end_literals_out != NULL) *end_literals_out = end_literals;