From bdbf4466b34ccfdc6ae7f1145937023f0cf2ef6d Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 10 Aug 2022 22:39:41 +0300 Subject: [PATCH] drc: rework smc checks again the way it was done before wasn't good enough for Mega Man Legends 2 at least --- libpcsxcore/new_dynarec/assem_arm.c | 20 ++- libpcsxcore/new_dynarec/assem_arm64.c | 21 +-- libpcsxcore/new_dynarec/linkage_arm.S | 4 +- libpcsxcore/new_dynarec/new_dynarec.c | 229 ++++++++++++++++---------- 4 files changed, 170 insertions(+), 104 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 6570f1e8..95007dfa 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -27,8 +27,6 @@ #include "pcnt.h" #include "arm_features.h" -#define unused __attribute__((unused)) - #ifdef DRC_DBG #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wunused-variable" @@ -1033,6 +1031,15 @@ static void emit_jcc(const void *a_) output_w32(0x3a000000|offset); } +static void *emit_cbz(int rs, const void *a) +{ + void *ret; + emit_test(rs, rs); + ret = out; + emit_jeq(a); + return ret; +} + static unused void emit_callreg(u_int r) { assert(r<15); @@ -1392,13 +1399,10 @@ static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) } // special case for checking invalid_code -static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm) +static void emit_ldrb_indexedsr12_reg(int base, int r, int rt) { - assert(imm<128&&imm>=0); - assert(r>=0&&r<16); - assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]); - output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620); - emit_cmpimm(HOST_TEMPREG,imm); + assem_debug("ldrb %s,%s,%s lsr #12\n",regname[rt],regname[base],regname[r]); + output_w32(0xe7d00000|rd_rn_rm(rt,base,r)|0x620); } static void emit_callne(int a) diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index b7d82c12..271bee58 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -23,8 +23,6 @@ #include "pcnt.h" #include "arm_features.h" -#define unused __attribute__((unused)) - void do_memhandler_pre(); void do_memhandler_post(); @@ -619,6 +617,10 @@ static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) { + if (imm == 0) { + emit_mov(rs, rt); + return; + } emit_addimm_s(0, 0, rs, imm, rt); } @@ -988,9 +990,11 @@ static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r) output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r)); } -static unused void emit_cbz(const void *a, u_int r) +static void *emit_cbz(u_int r, const void *a) { + void *ret = out; emit_cb(0, 0, a, r); + return ret; } static void emit_jmpreg(u_int r) @@ -1198,14 +1202,11 @@ static void emit_clz(u_int rs, u_int rt) } // special case for checking invalid_code -static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm) +static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt) { - host_tempreg_acquire(); - emit_shrimm(r, 12, HOST_TEMPREG); - assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]); - output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG)); - emit_cmpimm(HOST_TEMPREG, imm); - host_tempreg_release(); + emit_shrimm(r, 12, rt); + assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]); + output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt)); } // special for loadlr_assemble, rs2 is destroyed diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 513911ca..7e0db2d7 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -30,7 +30,7 @@ #define ndrc_try_restore_block ESYM(ndrc_try_restore_block) #define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht) #define ndrc_get_addr_ht_param ESYM(ndrc_get_addr_ht_param) -#define ndrc_invalidate_addr ESYM(ndrc_invalidate_addr) +#define ndrc_write_invalidate_one ESYM(ndrc_write_invalidate_one) #define gen_interupt ESYM(gen_interupt) #define gteCheckStallRaw ESYM(gteCheckStallRaw) #define psxException ESYM(psxException) @@ -401,7 +401,7 @@ invalidate_addr_call: ldr lr, [fp, #LO_inv_code_end] cmp r0, r12 cmpcs lr, r0 - blcc ndrc_invalidate_addr + blcc ndrc_write_invalidate_one ldmia fp, {r0, r1, r2, r3, EXTRA_UNSAVED_REGS r12, pc} .size invalidate_addr_call, .-invalidate_addr_call diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 0fafc60a..b9e7c144 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -41,6 +41,7 @@ static Jit g_jit; #include "emu_if.h" // emulator interface #include "arm_features.h" +#define unused __attribute__((unused)) #ifdef __clang__ #define noinline __attribute__((noinline)) #else @@ -58,6 +59,7 @@ static Jit g_jit; //#define DISASM //#define ASSEM_PRINT +//#define INV_DEBUG_W //#define STAT_PRINT #ifdef ASSEM_PRINT @@ -163,7 +165,7 @@ struct regstat u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true u_int isconst; // ... but isconst is false when r2 is known u_int loadedconst; // host regs that have constants loaded - u_int waswritten; // MIPS regs that were used as store base before + //u_int waswritten; // MIPS regs that were used as store base before }; struct ht_entry @@ -397,8 +399,9 @@ void new_dyna_leave(); void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile); void *ndrc_get_addr_ht(u_int vaddr); -void ndrc_invalidate_addr(u_int addr); void ndrc_add_jump_out(u_int vaddr, void *src); +void ndrc_write_invalidate_one(u_int addr); +static void ndrc_write_invalidate_many(u_int addr, u_int end); static int new_recompile_block(u_int addr); static void invalidate_block(struct block_info *block); @@ -687,6 +690,28 @@ static int doesnt_expire_soon(u_char *tcaddr) return diff > EXPIRITY_OFFSET + MAX_OUTPUT_BLOCK_SIZE; } +static unused void check_for_block_changes(u_int start, u_int end) +{ + u_int start_page = get_page_prev(start); + u_int end_page = get_page(end - 1); + u_int page; + + for (page = start_page; page <= end_page; page++) { + struct block_info *block; + for (block = blocks[page]; block != NULL; block = block->next) { + if (block->is_dirty) + continue; + if (memcmp(block->source, block->copy, block->len)) { + printf("bad block %08x-%08x %016llx %016llx @%08x\n", + block->start, block->start + block->len, + *(long long *)block->source, *(long long *)block->copy, psxRegs.pc); + fflush(stdout); + abort(); + } + } + } +} + static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) { void *found_clean = NULL; @@ -770,6 +795,7 @@ static void noinline *get_addr(u_int vaddr, int can_compile) // Look up address in hash table first void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile) { + //check_for_block_changes(vaddr, vaddr + MAXBLOCK); const struct ht_entry *ht_bin = hash_table_get(vaddr); stat_inc(stat_ht_lookups); if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0]; @@ -1165,7 +1191,8 @@ static const struct { FUNCNAME(jump_handler_write8), FUNCNAME(jump_handler_write16), FUNCNAME(jump_handler_write32), - FUNCNAME(ndrc_invalidate_addr), + FUNCNAME(ndrc_write_invalidate_one), + FUNCNAME(ndrc_write_invalidate_many), FUNCNAME(jump_to_new_pc), FUNCNAME(jump_break), FUNCNAME(jump_break_ds), @@ -1332,7 +1359,7 @@ static int blocks_remove_matching_addrs(struct block_info **head, int hit = 0; while (*head) { if ((((*head)->tc_offs ^ base_offs) >> shift) == 0) { - inv_debug("EXP: rm block %08x (tc_offs %zx)\n", (*head)->start, (*head)->tc_offs); + inv_debug("EXP: rm block %08x (tc_offs %x)\n", (*head)->start, (*head)->tc_offs); invalidate_block(*head); next = (*head)->next; free(*head); @@ -1393,7 +1420,7 @@ static void unlink_jumps_tc_range(struct jump_info *ji, u_int base_offs, int shi continue; } - inv_debug("EXP: rm link to %08x (tc_offs %zx)\n", ji->e[i].target_vaddr, tc_offs); + inv_debug("EXP: rm link to %08x (tc_offs %x)\n", ji->e[i].target_vaddr, tc_offs); stat_dec(stat_links); ji->count--; if (i < ji->count) { @@ -1428,7 +1455,7 @@ static int invalidate_range(u_int start, u_int end, int hit = 0; // additional area without code (to supplement invalid_code[]), [start, end) - // avoids excessive ndrc_invalidate_addr() calls + // avoids excessive ndrc_write_invalidate*() calls inv_start = start_m & ~0xfff; inv_end = end_m | 0xfff; @@ -1487,16 +1514,28 @@ void new_dynarec_invalidate_range(unsigned int start, unsigned int end) invalidate_range(start, end, NULL, NULL); } -void ndrc_invalidate_addr(u_int addr) +static void ndrc_write_invalidate_many(u_int start, u_int end) { // this check is done by the caller //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } - int ret = invalidate_range(addr, addr + 4, &inv_code_start, &inv_code_end); + int ret = invalidate_range(start, end, &inv_code_start, &inv_code_end); +#ifdef INV_DEBUG_W + int invc = invalid_code[start >> 12]; + u_int len = end - start; if (ret) - inv_debug("INV ADDR: %08x hit %d blocks\n", addr, ret); + printf("INV ADDR: %08x/%02x hit %d blocks\n", start, len, ret); else - inv_debug("INV ADDR: %08x miss, inv %08x-%08x\n", addr, inv_code_start, inv_code_end); + printf("INV ADDR: %08x/%02x miss, inv %08x-%08x invc %d->%d\n", start, len, + inv_code_start, inv_code_end, invc, invalid_code[start >> 12]); + check_for_block_changes(start, end); +#endif stat_inc(stat_inv_addr_calls); + (void)ret; +} + +void ndrc_write_invalidate_one(u_int addr) +{ + ndrc_write_invalidate_many(addr, addr + 4); } // This is called when loading a save state. @@ -1521,26 +1560,6 @@ void new_dynarec_invalidate_all_pages(void) do_clear_cache(); } -static void do_invstub(int n) -{ - literal_pool(20); - u_int reglist = stubs[n].a; - set_jump_target(stubs[n].addr, out); - save_regs(reglist); - if (stubs[n].b != 0) - emit_mov(stubs[n].b, 0); - emit_readword(&inv_code_start, 1); - emit_readword(&inv_code_end, 2); - emit_cmp(0, 1); - emit_cmpcs(2, 0); - void *jaddr = out; - emit_jc(0); - emit_far_call(ndrc_invalidate_addr); - set_jump_target(jaddr, out); - restore_regs(reglist); - emit_jmp(stubs[n].retaddr); // return address -} - // Add an entry to jump_out after making a link // src should point to code by emit_extjump() void ndrc_add_jump_out(u_int vaddr, void *src) @@ -3147,6 +3166,89 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) } #endif +static void do_invstub(int n) +{ + literal_pool(20); + assem_debug("do_invstub\n"); + u_int reglist = stubs[n].a; + u_int addrr = stubs[n].b; + int ofs_start = stubs[n].c; + int ofs_end = stubs[n].d; + int len = ofs_end - ofs_start; + u_int rightr = 0; + + set_jump_target(stubs[n].addr, out); + save_regs(reglist); + emit_readword(&inv_code_start, 2); + emit_readword(&inv_code_end, 3); + if (addrr != 0 || ofs_start != 0) + emit_addimm(addrr, ofs_start, 0); + if (len != 0) + emit_addimm(0, len + 4, (rightr = 1)); + emit_cmp(0, 2); + emit_cmpcs(3, rightr); + void *jaddr = out; + emit_jc(0); + void *func = (len != 0) + ? (void *)ndrc_write_invalidate_many + : (void *)ndrc_write_invalidate_one; + emit_far_call(func); + set_jump_target(jaddr, out); + restore_regs(reglist); + emit_jmp(stubs[n].retaddr); +} + +static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglist, int addr) +{ + if (HACK_ENABLED(NDHACK_NO_SMC_CHECK)) + return; + // this can't be used any more since we started to check exact + // block boundaries in invalidate_range() + //if (i_regs->waswritten & (1<= 0; j--) { + if (!dops[j].is_store || dops[j].rs1 != dops[i].rs1 + || abs(imm[j] - imm[j+1]) > imm_maxdiff) + break; + count++; + if (imm_min > imm[j]) + imm_min = imm[j]; + if (imm_max < imm[j]) + imm_max = imm[j]; + } +#if defined(HOST_IMM8) + int ir = get_reg(i_regs->regmap, INVCP); + assert(ir >= 0); + host_tempreg_acquire(); + emit_ldrb_indexedsr12_reg(ir, addr, HOST_TEMPREG); +#else + emit_cmpmem_indexedsr12_imm(invalid_code, addr, 1); + #error not handled +#endif +#ifdef INVALIDATE_USE_COND_CALL + if (count == 1) { + emit_cmpimm(HOST_TEMPREG, 1); + emit_callne(invalidate_addr_reg[addr]); + host_tempreg_release(); + return; + } +#endif + void *jaddr = emit_cbz(HOST_TEMPREG, 0); + host_tempreg_release(); + imm_min -= imm[i]; + imm_max -= imm[i]; + add_stub(INVCODE_STUB, jaddr, out, reglist|(1<waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,addr,1); - #else - emit_cmpmem_indexedsr12_imm(invalid_code,addr,1); - #endif - #ifdef INVALIDATE_USE_COND_CALL - emit_callne(invalidate_addr_reg[addr]); - #else - void *jaddr2 = out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,temp,1); - #else - emit_cmpmem_indexedsr12_imm(invalid_code,temp,1); - #endif - #ifdef INVALIDATE_USE_COND_CALL - emit_callne(invalidate_addr_reg[temp]); - #else - void *jaddr2 = out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,ar,1); -#else - emit_cmpmem_indexedsr12_imm(invalid_code,ar,1); -#endif - #ifdef INVALIDATE_USE_COND_CALL - emit_callne(invalidate_addr_reg[ar]); - #else - void *jaddr3 = out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr3,out,reglist|(1<0&&(dops[i-1].itype==STORE||dops[i-1].itype==STORELR||(dops[i-1].itype==C2LS&&dops[i-1].opcode==0x3a))&&(u_int)imm[i-1]<0x800) current.waswritten|=1<=0x800) current.waswritten&=~(1<0) @@ -7810,7 +7871,7 @@ static noinline void pass3_register_alloc(u_int addr) } } if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1; - regs[i].waswritten=current.waswritten; + //regs[i].waswritten=current.waswritten; } } @@ -8863,8 +8924,8 @@ static noinline void pass10_expire_blocks(void) u_int block_i = expirep / step & (PAGE_COUNT - 1); u_int phase = (expirep >> (base_shift - 1)) & 1u; if (!(expirep & (MAX_OUTPUT_BLOCK_SIZE / 2 - 1))) { - inv_debug("EXP: base_offs %x/%x phase %u\n", base_offs, - out - ndrc->translation_cache, phase); + inv_debug("EXP: base_offs %x/%lx phase %u\n", base_offs, + (long)(out - ndrc->translation_cache), phase); } if (!phase) { -- 2.39.5