From 7da5c7ad699ce52d0ef8361709b420751f4e42e0 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 16 Oct 2023 21:00:32 +0300 Subject: [PATCH] drc: implement cycle reload on read ... but decided to not enable it yet (or ever?) --- libpcsxcore/new_dynarec/assem_arm.c | 24 +++++-- libpcsxcore/new_dynarec/assem_arm64.c | 65 +++++++++++++------ libpcsxcore/new_dynarec/linkage_arm.S | 43 +++++++----- libpcsxcore/new_dynarec/linkage_arm64.S | 17 ++--- libpcsxcore/new_dynarec/patches/trace_drc_chk | 41 ++++++------ 5 files changed, 121 insertions(+), 69 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index bdb81b4d..70798eff 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1734,6 +1734,14 @@ static void do_readstub(int n) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); emit_far_call(handler); +#if 0 + if (type == LOADW_STUB) { + // new cycle_count returned in r2 + emit_addimm(2, -(int)stubs[n].d, cc<0?2:cc); + if (cc < 0) + emit_storereg(CCREG, 2); + } +#endif if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { mov_loadtype_adj(type,0,rt); } @@ -1804,6 +1812,14 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_far_call(handler); +#if 0 + if (type == LOADW_STUB) { + // new cycle_count returned in r2 + emit_addimm(2, -adj, cc<0?2:cc); + if (cc < 0) + emit_storereg(CCREG, 2); + } +#endif if(rt>=0&&dops[i].rt1!=0) { switch(type) { case LOADB_STUB: emit_signextend8(0,rt); break; @@ -1887,9 +1903,9 @@ static void do_writestub(int n) if(cc<0) emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); - // returns new cycle_count emit_far_call(handler); - emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc); + // new cycle_count returned in r2 + emit_addimm(2,-(int)stubs[n].d,cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); if(restore_jump) @@ -1927,9 +1943,9 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, emit_loadreg(CCREG,2); emit_addimm(cc<0?2:cc,adj,2); emit_movimm((u_int)handler,3); - // returns new cycle_count emit_far_call(jump_handler_write_h); - emit_addimm(0,-adj,cc<0?2:cc); + // new cycle_count returned in r2 + emit_addimm(2,-adj,cc<0?2:cc); if(cc<0) emit_storereg(CCREG,2); restore_regs(reglist); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 97e1fb14..bad2854c 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -1465,6 +1465,7 @@ static void do_readstub(int n) int i = stubs[n].a; int rs = stubs[n].b; const struct regstat *i_regs = (void *)stubs[n].c; + int adj = (int)stubs[n].d; u_int reglist = stubs[n].e; const signed char *i_regmap = i_regs->regmap; int rt; @@ -1527,12 +1528,22 @@ static void do_readstub(int n) handler=jump_handler_read32; assert(handler); pass_args64(rs,temp2); - int cc=get_reg(i_regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); + int cc, cc_use; + cc = cc_use = get_reg(i_regmap, CCREG); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, adj, 2); + emit_far_call(handler); - // (no cycle reload after read) + +#if 0 + // cycle reload for read32 only (value in w2 both in and out) + if (type == LOADW_STUB) { + emit_addimm(2, -adj, cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); + } +#endif if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { loadstore_extend(type,0,rt); } @@ -1551,7 +1562,8 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, u_int is_dynamic=0; uintptr_t host_addr = 0; void *handler; - int cc=get_reg(regmap,CCREG); + int cc, cc_use; + cc = cc_use = get_reg(regmap, CCREG); //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt)) // return; handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr); @@ -1588,9 +1600,9 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_movimm(addr,0); else if(ra!=0) emit_mov(ra,0); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,adj,2); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, adj, 2); if(is_dynamic) { uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1; intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl); @@ -1606,7 +1618,16 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_far_call(handler); - // (no cycle reload after read) +#if 0 + // cycle reload for read32 only (value in w2 both in and out) + if (type == LOADW_STUB) { + if (!is_dynamic) + emit_far_call(do_memhandler_post); + emit_addimm(2, -adj, cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); + } +#endif if(rt>=0&&dops[i].rt1!=0) loadstore_extend(type, 0, rt); restore_regs(reglist); @@ -1620,6 +1641,7 @@ static void do_writestub(int n) int i=stubs[n].a; int rs=stubs[n].b; struct regstat *i_regs=(struct regstat *)stubs[n].c; + int adj = (int)stubs[n].d; u_int reglist=stubs[n].e; signed char *i_regmap=i_regs->regmap; int rt,r; @@ -1687,16 +1709,19 @@ static void do_writestub(int n) emit_mov64(temp2,3); host_tempreg_release(); } - int cc=get_reg(i_regmap,CCREG); - if(cc<0) - emit_loadreg(CCREG,2); - emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); - // returns new cycle_count + int cc, cc_use; + cc = cc_use = get_reg(i_regmap, CCREG); + if (cc < 0) + emit_loadreg(CCREG, (cc_use = 2)); + emit_addimm(cc_use, adj, 2); + emit_far_call(handler); - emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc); - if(cc<0) - emit_storereg(CCREG,2); - if(restore_jump) + + // new cycle_count returned in x2 + emit_addimm(2, -adj, cc_use); + if (cc < 0) + emit_storereg(CCREG, cc_use); + if (restore_jump) set_jump_target(restore_jump, out); restore_regs(reglist); emit_jmp(stubs[n].retaddr); @@ -1736,7 +1761,7 @@ static void inline_writestub(enum stub_type type, int i, u_int addr, emit_far_call(do_memhandler_pre); emit_far_call(handler); emit_far_call(do_memhandler_post); - emit_addimm(0, -adj, cc_use); + emit_addimm(2, -adj, cc_use); if (cc < 0) emit_storereg(CCREG, cc_use); restore_regs(reglist); diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index baac1765..2bcf6654 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -419,15 +419,23 @@ FUNCTION(new_dyna_start): /* --------------------------------------- */ -.align 2 +.macro memhandler_post + /* r2 = cycles_out, r3 = tmp */ + ldr r3, [fp, #LO_next_interupt] + ldr r2, [fp, #LO_cycle] @ memhandlers can modify cc, like dma + str r3, [fp, #LO_last_count] + sub r2, r2, r3 +.endm + +.align 2 -.macro pcsx_read_mem readop tab_shift +.macro pcsx_read_mem_part readop tab_shift /* r0 = address, r1 = handler_tab, r2 = cycles */ lsl r3, r0, #20 lsr r3, #(20+\tab_shift) ldr r12, [fp, #LO_last_count] ldr r1, [r1, r3, lsl #2] - add r2, r2, r12 + add r12, r2, r12 lsls r1, #1 .if \tab_shift == 1 lsl r3, #1 @@ -436,28 +444,30 @@ FUNCTION(new_dyna_start): \readop r0, [r1, r3, lsl #\tab_shift] .endif movcc pc, lr - str r2, [fp, #LO_cycle] - bx r1 + mov r2, r12 + str r12, [fp, #LO_cycle] .endm FUNCTION(jump_handler_read8): add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part - pcsx_read_mem ldrbcc, 0 + pcsx_read_mem_part ldrbcc, 0 + bx r1 @ addr, unused, cycles FUNCTION(jump_handler_read16): add r1, #0x1000/4*4 @ shift to r16 part - pcsx_read_mem ldrhcc, 1 + pcsx_read_mem_part ldrhcc, 1 + bx r1 @ addr, unused, cycles FUNCTION(jump_handler_read32): - pcsx_read_mem ldrcc, 2 - - -.macro memhandler_post - ldr r0, [fp, #LO_next_interupt] - ldr r2, [fp, #LO_cycle] @ memhandlers can modify cc, like dma - str r0, [fp, #LO_last_count] - sub r0, r2, r0 -.endm + pcsx_read_mem_part ldrcc, 2 + bx r1 @ addr, unused, cycles +#if 0 + str lr, [fp, #LO_saved_lr] + blx r1 + ldr lr, [fp, #LO_saved_lr] + memhandler_post + bx lr +#endif .macro pcsx_write_mem wrtop tab_shift /* r0 = address, r1 = data, r2 = cycles, r3 = handler_tab */ @@ -466,7 +476,6 @@ FUNCTION(jump_handler_read32): ldr r3, [r3, r12, lsl #2] str r0, [fp, #LO_address] @ some handlers still need it.. lsls r3, #1 - mov r0, r2 @ cycle return in case of direct store .if \tab_shift == 1 lsl r12, #1 \wrtop r1, [r3, r12] diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 3519dffb..fa8a4117 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -227,10 +227,11 @@ FUNCTION(new_dyna_leave): .endm .macro memhandler_post - ldr w0, [rFP, #LO_next_interupt] + /* w2 = cycles_out, x3 = tmp */ + ldr w3, [rFP, #LO_next_interupt] ldr w2, [rFP, #LO_cycle] // memhandlers can modify cc, like dma - str w0, [rFP, #LO_last_count] - sub w0, w2, w0 + str w3, [rFP, #LO_last_count] + sub w2, w2, w3 .endm FUNCTION(do_memhandler_pre): @@ -258,17 +259,18 @@ FUNCTION(do_memhandler_post): FUNCTION(jump_handler_read8): add x1, x1, #0x1000/4*8 + 0x1000/2*8 /* shift to r8 part */ pcsx_read_mem ldrb, 0 - b handler_read_end + ldp xzr, x30, [sp], #16 + ret FUNCTION(jump_handler_read16): add x1, x1, #0x1000/4*8 /* shift to r16 part */ pcsx_read_mem ldrh, 1 - b handler_read_end + ldp xzr, x30, [sp], #16 + ret FUNCTION(jump_handler_read32): pcsx_read_mem ldr, 2 - -handler_read_end: + /* memhandler_post */ ldp xzr, x30, [sp], #16 ret @@ -278,7 +280,6 @@ handler_read_end: ldr x3, [x3, w4, uxtw #3] adds x3, x3, x3 bcs 0f - mov w0, w2 /* cycle return */ \wrtop w1, [x3, w4, uxtw #\tab_shift] ret 0: diff --git a/libpcsxcore/new_dynarec/patches/trace_drc_chk b/libpcsxcore/new_dynarec/patches/trace_drc_chk index 5dab3175..da861698 100644 --- a/libpcsxcore/new_dynarec/patches/trace_drc_chk +++ b/libpcsxcore/new_dynarec/patches/trace_drc_chk @@ -1,8 +1,8 @@ diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c -index ede1f93c..1c8965f0 100644 +index 74f32ee3..4eec8a83 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c -@@ -324,7 +324,7 @@ static struct compile_info +@@ -325,7 +325,7 @@ static struct compile_info int new_dynarec_hacks_old; int new_dynarec_did_compile; @@ -11,7 +11,7 @@ index ede1f93c..1c8965f0 100644 extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) extern int last_count; // last absolute target, often = next_interupt -@@ -602,6 +602,7 @@ static int cycle_multiplier_active; +@@ -603,6 +603,7 @@ static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) { @@ -19,7 +19,7 @@ index ede1f93c..1c8965f0 100644 int m = cycle_multiplier_active; int s = (x >> 31) | 1; return (x * m + s * 50) / 100; -@@ -776,6 +777,9 @@ static noinline u_int generate_exception(u_int pc) +@@ -808,6 +809,9 @@ static noinline u_int generate_exception(u_int pc) // This is called from the recompiled JR/JALR instructions static void noinline *get_addr(u_int vaddr, int can_compile) { @@ -29,7 +29,7 @@ index ede1f93c..1c8965f0 100644 u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); void *found_clean = NULL; -@@ -7157,7 +7161,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) +@@ -7213,7 +7217,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) // R0 is always unneeded u|=1; // Save it @@ -38,7 +38,7 @@ index ede1f93c..1c8965f0 100644 gte_unneeded[i]=gte_u; /* printf("ur (%d,%d) %x: ",istart,iend,start+i*4); -@@ -8299,6 +8303,7 @@ static noinline void pass5a_preallocate1(void) +@@ -8355,6 +8359,7 @@ static noinline void pass5a_preallocate1(void) static noinline void pass5b_preallocate2(void) { int i, hr; @@ -46,7 +46,7 @@ index ede1f93c..1c8965f0 100644 for(i=0;isubCycleStep >= 0x10000); regs->subCycle += regs->subCycleStep; @@ -124,11 +124,12 @@ index 5756bee5..4fe98b1b 100644 regs->subCycle &= 0xffff; } -@@ -1344,8 +1344,14 @@ static void intShutdown() { +@@ -1348,8 +1348,15 @@ static void intShutdown() { // single step (may do several ops in case of a branch or load delay) // called by asm/dynarec void execI(psxRegisters *regs) { + extern int last_count; ++ extern u32 next_interupt; + void do_insn_cmp(void); + printf("execI %08x c %u, ni %u\n", regs->pc, regs->cycle, next_interupt); + last_count = 0; -- 2.39.5