From 68e50296e675d3f5a6e23a26029a64930cdea7dc Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 27 May 2024 23:53:26 +0200 Subject: [PATCH] sh2 drc, small fixes (cycle counting, invalidation) --- Makefile | 2 +- cpu/drc/emit_arm.c | 13 +++--- cpu/drc/emit_x86.c | 3 +- cpu/sh2/compiler.c | 87 ++++++++++++++++++++++------------------ platform/common/disarm.c | 9 +++++ 5 files changed, 67 insertions(+), 47 deletions(-) diff --git a/Makefile b/Makefile index 99c33916..83523895 100644 --- a/Makefile +++ b/Makefile @@ -102,7 +102,7 @@ asm_32xmemory ?= 1 else use_fame ?= 1 use_cz80 ?= 1 -ifneq (,$(filter x86% i386% mips% aarch% riscv% powerpc% ppc%, $(ARCH))) +ifneq (,$(filter x86% i386% i686% mips% aarch% riscv% powerpc% ppc%, $(ARCH))) use_sh2drc ?= 1 endif endif diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index ed4732f6..454ff87b 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1329,9 +1329,10 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs) int t2 = rcache_get_tmp(); \ int t3 = rcache_get_tmp(); \ /* if (sr < 0) return */ \ - emith_asrf(t2, sr, 12); \ + emith_cmp_r_imm(sr, 0); \ EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ @@ -1362,13 +1363,11 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs) } while (0) #define emith_carry_to_t(srr, is_sub) do { \ - if (is_sub) { /* has inverted C on ARM */ \ + emith_bic_r_imm(srr, 1); \ + if (is_sub) /* has inverted C on ARM */ \ emith_or_r_imm_c(A_COND_CC, srr, 1); \ - emith_bic_r_imm_c(A_COND_CS, srr, 1); \ - } else { \ + else \ emith_or_r_imm_c(A_COND_CS, srr, 1); \ - emith_bic_r_imm_c(A_COND_CC, srr, 1); \ - } \ } while (0) #define emith_t_to_carry(srr, is_sub) do { \ @@ -1494,7 +1493,7 @@ static void emith_sync_t(int sr) else if (tcond == A_COND_NV) emith_bic_r_imm(sr, T); else if (tcond >= 0) { - emith_bic_r_imm_c(emith_invert_cond(tcond),sr, T); + emith_bic_r_imm(sr, T); emith_or_r_imm_c(tcond, sr, T); } tcond = -1; diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 6fbe5abf..6a815e85 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1269,9 +1269,10 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common if (t3 == xAX) { t3 = t1; t1 = xAX; } /* for MUL */ \ if (t3 == xDX) { t3 = t2; t2 = xDX; } \ /* if (sr < 0) return */ \ - emith_asrf(t2, sr, 12); \ + emith_cmp_r_imm(sr, 0); \ EMITH_JMP_START(DCOND_LE); \ /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles)) + 1); \ emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ rcache_free_tmp(t3); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index d7ae8960..b87f2b9b 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -260,7 +260,7 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) printf("trace eof at %08lx\n",ftell(trace[idx])); exit(1); } - fsh2.sr = (fsh2.sr & 0xbff) | (sh2->sr & ~0xbff); + fsh2.sr = (fsh2.sr & 0x3ff) | (sh2->sr & ~0x3ff); fsh2.is_slave = idx; if (memcmp(&fsh2, sh2, offsetof(SH2, read8_map)) || 0)//memcmp(&fsh2.pdb_io_csum, &sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum))) @@ -3250,6 +3250,7 @@ static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tca } static void *dr_get_pc_base(u32 pc, SH2 *sh2); +static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free); static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { @@ -3319,6 +3320,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) block = dr_find_inactive_block(tcache_id, crc, base_pc, end_pc - base_pc, base_literals, end_literals - base_literals); +#if (DRC_DEBUG & (256|512)) + // remove any (partial) old blocks which might get in the way, to make sure + // the same branch targets are used in the recording/playback code. Not needed + // normally since the SH2 code wasn't overwritten and should be the same. + sh2_smc_rm_blocks(base_pc, end_pc - base_pc, tcache_id, 0); +#endif + if (block) { dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr); @@ -3539,7 +3547,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // if exiting a pinned loop pinned regs must be written back to ctx // since they are reloaded in the loop entry code emith_cmp_r_imm(sr, 0); - EMITH_JMP_START(DCOND_GT); + EMITH_JMP_START(DCOND_GE); rcache_save_pinned(); if (blx_target_count < ARRAY_SIZE(blx_targets)) { @@ -3554,7 +3562,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_jump(sh2_drc_exit); rcache_free_tmp(tmp); } - EMITH_JMP_END(DCOND_GT); + EMITH_JMP_END(DCOND_GE); } else #endif { @@ -3568,10 +3576,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // blx table full, must inline exit code tmp = rcache_get_tmp_arg(0); emith_cmp_r_imm(sr, 0); - EMITH_SJMP_START(DCOND_GT); - emith_move_r_imm_c(DCOND_LE, tmp, pc); - emith_jump_cond(DCOND_LE, sh2_drc_exit); - EMITH_SJMP_END(DCOND_GT); + EMITH_SJMP_START(DCOND_GE); + emith_move_r_imm_c(DCOND_LT, tmp, pc); + emith_jump_cond(DCOND_LT, sh2_drc_exit); + EMITH_SJMP_END(DCOND_GE); rcache_free_tmp(tmp); } } @@ -5627,7 +5635,7 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) +static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free) { struct block_list **blist, *entry, *next; u32 mask = RAM_SIZE(tcache_id) - 1; @@ -5635,40 +5643,43 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) u32 start_addr, end_addr; u32 start_lit, end_lit; struct block_desc *block; -#if (DRC_DEBUG & 2) - int removed = 0; -#endif + int removed = 0, rest; + u32 _a = a; // ignore cache-through a &= wtmask; - blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE]; - entry = *blist; - // go through the block list for this range - while (entry != NULL) { - next = entry->next; - block = entry->block; - start_addr = block->addr & wtmask; - end_addr = start_addr + block->size; - start_lit = block->addr_lit & wtmask; - end_lit = start_lit + block->size_lit; - // disable/delete block if it covers the modified address - if ((start_addr < a+len && a < end_addr) || - (start_lit < a+len && a < end_lit)) - { - dbg(2, "smc remove @%08x", a); - end_addr = (start_lit < a+len && block->size_lit ? a : 0); - dr_rm_block_entry(block, tcache_id, end_addr, 0); -#if (DRC_DEBUG & 2) - removed = 1; -#endif + do { + blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE]; + entry = *blist; + // go through the block list for this range + while (entry != NULL) { + next = entry->next; + block = entry->block; + start_addr = block->addr & wtmask; + end_addr = start_addr + block->size; + start_lit = block->addr_lit & wtmask; + end_lit = start_lit + block->size_lit; + // disable/delete block if it covers the modified address + if ((start_addr < a+len && a < end_addr) || + (start_lit < a+len && a < end_lit)) + { + dbg(2, "smc remove @%08x", a); + end_addr = (start_lit < a+len && block->size_lit ? a : 0); + dr_rm_block_entry(block, tcache_id, end_addr, free); + removed = 1; + } + entry = next; } - entry = next; + rest = INVAL_PAGE_SIZE - (a & (INVAL_PAGE_SIZE-1)); + a += rest, len -= rest; + } while (len > 0); + + if (!removed && len <= 4) { + dbg(2, "rm_blocks called @%08x, no work?", _a); + return; } -#if (DRC_DEBUG & 2) - if (!removed) - dbg(2, "rm_blocks called @%08x, no work?", a); -#endif + #if BRANCH_CACHE if (tcache_id) memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); @@ -5691,12 +5702,12 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) void sh2_drc_wcheck_ram(u32 a, unsigned len, SH2 *sh2) { - sh2_smc_rm_blocks(a, len, 0, SH2_DRCBLK_RAM_SHIFT); + sh2_smc_rm_blocks(a, len, 0, 0); } void sh2_drc_wcheck_da(u32 a, unsigned len, SH2 *sh2) { - sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT); + sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, 0); } int sh2_execute_drc(SH2 *sh2c, int cycles) diff --git a/platform/common/disarm.c b/platform/common/disarm.c index 24992206..90d0b80d 100644 --- a/platform/common/disarm.c +++ b/platform/common/disarm.c @@ -207,6 +207,15 @@ static int data_processing(unsigned int pc, unsigned int insn, char *buf, size_t snprintf(buf, buf_len, "%s%s%s %s,%s,%s%s", name, condition(insn), s, register_name(rd), register_name(rn), register_name(rm), shift(insn, tmp_buf, sizeof(tmp_buf))); } } + else if ((insn & 0x0fb00000) == 0x03000000) + { + unsigned int imm; + char *half = (insn & 0x00400000) ? "t" : "w"; + + imm = (insn & 0x00000fff) | ((insn & 0x000f0000) >> 4); + + snprintf(buf, buf_len, "mov%s%s %s%s", half, condition(insn), register_name(rd), immediate(imm, 0, 1, tmp_buf, sizeof(tmp_buf))); + } else { unsigned int imm; -- 2.39.2