From: notaz Date: Thu, 23 Jan 2025 22:19:14 +0000 (+0200) Subject: drc: implement block linking on platforms that lacked it X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=555d3b51cd4f189006adef2d493fe0dde5c44393;p=pcsx_rearmed.git drc: implement block linking on platforms that lacked it ... and likely break some of those platforms that I can't test :( --- diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index b0810462..5caa536e 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -28,6 +28,10 @@ #include "pcnt.h" #include "arm_features.h" +#ifdef TC_WRITE_OFFSET +#error "not implemented" +#endif + #ifdef DRC_DBG #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wunused-variable" @@ -103,11 +107,19 @@ const void *invalidate_addr_reg[16] = { /* Linker */ +static void set_jump_target_far1(u_int *insn, void *target) +{ + u_int ni = *insn & 0xff000000; + ni |= (((u_int)target - (u_int)insn - 8u) << 6) >> 8; + assert((ni & 0x0e000000) == 0x0a000000); + *insn = ni; +} + static void set_jump_target(void *addr, void *target_) { - u_int target = (u_int)target_; - u_char *ptr = addr; - u_int *ptr2=(u_int *)ptr; + const u_int target = (u_int)target_; + const u_char *ptr = addr; + u_int *ptr2 = (u_int *)ptr; if(ptr[3]==0xe2) { assert((target-(u_int)ptr2-8)<1024); assert(((uintptr_t)addr&3)==0); @@ -130,8 +142,7 @@ static void set_jump_target(void *addr, void *target_) else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8); } else { - assert((ptr[3]&0x0e)==0xa); - *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); + set_jump_target_far1(ptr2, target_); } } @@ -190,20 +201,6 @@ static void *find_extjump_insn(void *stub) return *l_ptr; } -// find where external branch is liked to using addr of it's stub: -// get address that insn one after stub loads (dyna_linker arg1), -// treat it as a pointer to branch insn, -// return addr where that branch jumps to -#if 0 -static void *get_pointer(void *stub) -{ - //printf("get_pointer(%x)\n",(int)stub); - int *i_ptr=find_extjump_insn(stub); - assert((*i_ptr&0x0f000000)==0x0a000000); // b - return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8; -} -#endif - // Allocate a specific ARM register. static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) { @@ -1586,7 +1583,7 @@ static void literal_pool_jumpover(int n) set_jump_target(jaddr, out); } -// parsed by get_pointer, find_extjump_insn +// parsed by find_extjump_insn, check_extjump2 static void emit_extjump(u_char *addr, u_int target) { u_char *ptr=(u_char *)addr; diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index b3558767..74b1657f 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -36,6 +36,7 @@ extern char *invc_ptr; +// note: max due to branch encoding: arm 32M, arm64 128M #define TARGET_SIZE_2 24 // 2^24 = 16 megabytes struct tramp_insns diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 9f2f66af..8f174fde 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -24,14 +24,24 @@ #include "arm_features.h" /* Linker */ +static void set_jump_target_far1(u_int *insn_, void *target) +{ + u_int *insn = NDRC_WRITE_OFFSET(insn_); + u_int in = *insn & 0xfc000000; + intptr_t offset = (u_char *)target - (u_char *)insn_; + assert(in == 0x14000000); + assert(-134217728 <= offset && offset < 134217728); + in |= (offset >> 2) & 0x3ffffff; + *insn = in; +} + static void set_jump_target(void *addr, void *target) { u_int *ptr = NDRC_WRITE_OFFSET(addr); intptr_t offset = (u_char *)target - (u_char *)addr; if ((*ptr&0xFC000000) == 0x14000000) { // b - assert(offset>=-134217728LL&&offset<134217728LL); - *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff); + set_jump_target_far1(addr, target); } else if ((*ptr&0xff000000) == 0x54000000 // b.cond || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz @@ -61,24 +71,6 @@ static void *find_extjump_insn(void *stub) return ptr + offset / 4; } -#if 0 -// find where external branch is liked to using addr of it's stub: -// get address that the stub loads (dyna_linker arg1), -// treat it as a pointer to branch insn, -// return addr where that branch jumps to -static void *get_pointer(void *stub) -{ - int *i_ptr = find_extjump_insn(stub); - if ((*i_ptr&0xfc000000) == 0x14000000) // b - return i_ptr + ((signed int)(*i_ptr<<6)>>6); - if ((*i_ptr&0xff000000) == 0x54000000 // b.cond - || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz - return i_ptr + ((signed int)(*i_ptr<<8)>>13); - assert(0); - return NULL; -} -#endif - // Allocate a specific ARM register. static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) { @@ -1365,7 +1357,7 @@ static void literal_pool_jumpover(int n) { } -// parsed by get_pointer, find_extjump_insn +// parsed by find_extjump_insn, check_extjump2 static void emit_extjump(u_char *addr, u_int target) { assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond diff --git a/libpcsxcore/new_dynarec/assem_arm64.h b/libpcsxcore/new_dynarec/assem_arm64.h index f8ee042f..948b91d8 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.h +++ b/libpcsxcore/new_dynarec/assem_arm64.h @@ -1,8 +1,9 @@ #define HOST_IMM8 1 /* calling convention: - r0 -r17: caller-save - r19-r29: callee-save */ + x0 -x17: caller-save + x18 : caller-save (platform reg) + x19-x29: callee-save */ #define HOST_REGS 29 #define EXCLUDE_REG -1 diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 9ac9e05d..7976cb7d 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -26,7 +26,7 @@ #ifdef __MACH__ #define dynarec_local ESYM(dynarec_local) -#define ndrc_add_jump_out ESYM(ndrc_add_jump_out) +#define ndrc_patch_link ESYM(ndrc_patch_link) #define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht) #define ndrc_get_addr_ht_param ESYM(ndrc_get_addr_ht_param) #define ndrc_write_invalidate_one ESYM(ndrc_write_invalidate_one) @@ -148,15 +148,15 @@ DRC_VAR(mini_ht, 256) FUNCTION(dyna_linker): /* r0 = virtual target address */ /* r1 = pointer to an instruction to patch */ -#ifndef NO_WRITE_EXEC +#if 1 ldr r7, [r1] mov r4, r0 add r6, r7, #2 mov r5, r1 lsl r6, r6, #8 /* must not compile - that might expire the caller block */ - ldr r0, [fp, #LO_hash_table_ptr] - mov r1, r4 + ldr r0, [fp, #LO_hash_table_ptr] + mov r1, r4 mov r2, #0 /* ndrc_compile_mode=ndrc_cm_no_compile */ bl ndrc_get_addr_ht_param @@ -166,22 +166,15 @@ FUNCTION(dyna_linker): teq r0, r6 bxeq r0 /* Stale i-cache */ mov r0, r4 - mov r1, r6 - bl ndrc_add_jump_out - - sub r2, r8, r5 - and r1, r7, #0xff000000 - lsl r2, r2, #6 - sub r1, r1, #2 - add r1, r1, r2, lsr #8 - str r1, [r5] + mov r1, r5 + mov r2, r6 + mov r3, r8 + bl ndrc_patch_link bx r8 0: mov r0, r4 -#else - /* XXX: should be able to do better than this... */ #endif - ldr r1, [fp, #LO_hash_table_ptr] + ldr r1, [fp, #LO_hash_table_ptr] bl ndrc_get_addr_ht bx r0 .size dyna_linker, .-dyna_linker diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index 47aa39c7..730f9cac 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -26,7 +26,7 @@ #ifdef __MACH__ #define dynarec_local ESYM(dynarec_local) -#define ndrc_add_jump_out ESYM(ndrc_add_jump_out) +#define ndrc_patch_link ESYM(ndrc_patch_link) #define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht) #define gen_interupt ESYM(gen_interupt) #define psxException ESYM(psxException) @@ -90,8 +90,30 @@ DRC_VAR(mini_ht, 256) .align 2 FUNCTION(dyna_linker): - /* r0 = virtual target address */ - /* r1 = instruction to patch */ + /* w0 = virtual target address */ + /* x1 = instruction to patch */ +#if 1 + mov w19, w0 + mov x20, x1 + /* must not compile - that might expire the caller block */ + ldr x0, [rFP, #LO_hash_table_ptr] + mov w1, w19 + mov w2, #0 /* ndrc_compile_mode=ndrc_cm_no_compile */ + bl ndrc_get_addr_ht_param + cbz x0, 0f + + ldr w2, [x20] + mov x3, x0 + sbfiz x2, x2, 2, 26 + add x2, x2, x20 + mov x1, x20 + mov w0, w19 + mov x19, x3 + bl ndrc_patch_link + br x19 +0: + mov w0, w19 +#endif ldr x1, [rFP, #LO_hash_table_ptr] bl ndrc_get_addr_ht br x0 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index fda54348..437d1764 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -410,7 +410,6 @@ void jump_to_new_pc(); void new_dyna_leave(); void *ndrc_get_addr_ht(u_int vaddr, struct ht_entry *ht); -void ndrc_add_jump_out(u_int vaddr, void *src); void ndrc_write_invalidate_one(u_int addr); static void ndrc_write_invalidate_many(u_int addr, u_int end); @@ -1717,15 +1716,15 @@ void new_dynarec_invalidate_all_pages(void) } // Add an entry to jump_out after making a link -// src should point to code by emit_extjump() -void ndrc_add_jump_out(u_int vaddr, void *src) +// stub should point to stub code by emit_extjump() +static void ndrc_add_jump_out(u_int vaddr, void *stub) { - inv_debug("ndrc_add_jump_out: %p -> %x\n", src, vaddr); + inv_debug("ndrc_add_jump_out: %p -> %x\n", stub, vaddr); u_int page = get_page(vaddr); struct jump_info *ji; stat_inc(stat_links); - check_extjump2(src); + check_extjump2(stub); ji = jumps[page]; if (ji == NULL) { ji = malloc(sizeof(*ji) + sizeof(ji->e[0]) * 16); @@ -1738,10 +1737,30 @@ void ndrc_add_jump_out(u_int vaddr, void *src) } jumps[page] = ji; ji->e[ji->count].target_vaddr = vaddr; - ji->e[ji->count].stub = src; + ji->e[ji->count].stub = stub; ji->count++; } +void ndrc_patch_link(u_int vaddr, void *insn, void *stub, void *target) +{ + void *insn_end = (char *)insn + 4; + + //start_tcache_write(insn, insn_end); + mprotect_w_x(insn, insn_end, 0); + + assert(target != stub); + set_jump_target_far1(insn, target); + ndrc_add_jump_out(vaddr, stub); + +#if defined(__aarch64__) || defined(NO_WRITE_EXEC) + // arm64: no syscall concerns, dyna_linker lacks stale detection + // w^x: have to do costly permission switching anyway + new_dyna_clear_cache(NDRC_WRITE_OFFSET(insn), NDRC_WRITE_OFFSET(insn_end)); +#endif + //end_tcache_write(insn, insn_end); + mprotect_w_x(insn, insn_end, 1); +} + /* Register allocation */ static void alloc_set(struct regstat *cur, int reg, int hr) @@ -6287,11 +6306,11 @@ static noinline void new_dynarec_test(void) SysPrintf("(%p) testing if we can run recompiled code @%p...\n", new_dynarec_test, out); - ((volatile u_int *)NDRC_WRITE_OFFSET(out))[0]++; // make the cache dirty for (i = 0; i < ARRAY_SIZE(ret); i++) { out = ndrc->translation_cache; beginning = start_block(); + ((volatile u_int *)NDRC_WRITE_OFFSET(out))[0]++; // make the cache dirty emit_movimm(DRC_TEST_VAL + i, 0); // test emit_ret(); literal_pool(0); @@ -6406,6 +6425,8 @@ void new_dynarec_init(void) void *mw = mmap(NULL, sizeof(*ndrc), PROT_READ | PROT_WRITE, (flags = MAP_SHARED), fd, 0); assert(mw != MAP_FAILED); + #endif + #if defined(NO_WRITE_EXEC) || defined(TC_WRITE_OFFSET) prot = PROT_READ | PROT_EXEC; #endif ndrc = mmap((void *)desired_addr, sizeof(*ndrc), prot, flags, fd, 0); @@ -6418,14 +6439,17 @@ void new_dynarec_init(void) #endif #endif #else - #ifndef NO_WRITE_EXEC ndrc = (struct ndrc_mem *)((size_t)(ndrc_bss + align) & ~align); + #ifndef NO_WRITE_EXEC // not all systems allow execute in data segment by default // size must be 4K aligned for 3DS? if (mprotect(ndrc, sizeof(*ndrc), PROT_READ | PROT_WRITE | PROT_EXEC) != 0) SysPrintf("mprotect(%p) failed: %s\n", ndrc, strerror(errno)); #endif + #ifdef TC_WRITE_OFFSET + #error "misconfiguration detected" + #endif #endif out = ndrc->translation_cache; new_dynarec_clear_full(); @@ -6473,17 +6497,17 @@ void new_dynarec_cleanup(void) static u_int *get_source_start(u_int addr, u_int *limit) { - if (addr < 0x00800000 - || (0x80000000 <= addr && addr < 0x80800000) - || (0xa0000000 <= addr && addr < 0xa0800000)) + if (addr < 0x00800000u + || (0x80000000u <= addr && addr < 0x80800000u) + || (0xa0000000u <= addr && addr < 0xa0800000u)) { // used for BIOS calls mostly? *limit = (addr & 0xa0600000) + 0x00200000; return (u_int *)(psxM + (addr & 0x1fffff)); } else if ( - /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/ - (0xbfc00000 <= addr && addr < 0xbfc80000)) + (0x9fc00000u <= addr && addr < 0x9fc80000u) || + (0xbfc00000u <= addr && addr < 0xbfc80000u)) { // BIOS. The multiplier should be much higher as it's uncached 8bit mem // XXX: disabled as this introduces differences from the interpreter diff --git a/libpcsxcore/new_dynarec/new_dynarec_config.h b/libpcsxcore/new_dynarec/new_dynarec_config.h index 9687aa97..0781e47d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec_config.h +++ b/libpcsxcore/new_dynarec/new_dynarec_config.h @@ -6,6 +6,12 @@ #define USE_MINI_HT 1 //#define REG_PREFETCH 1 +// options: +//#define NO_WRITE_EXEC 1 +//#define BASE_ADDR_DYNAMIC 1 +//#define TC_WRITE_OFFSET 1 +//#define NDRC_CACHE_FLUSH_ALL 1 + #if defined(__MACH__) || defined(HAVE_LIBNX) #define NO_WRITE_EXEC 1 #endif