From 0b1633d72a8854f7ee4f62f320ef0ecf8ff71ea1 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 11 Oct 2024 02:34:16 +0300 Subject: [PATCH] drc: try compiling on another thread --- .gitmodules | 3 + Makefile | 20 +- deps/libretro-common | 1 + frontend/libretro-rthreads.c | 11 + frontend/libretro-rthreads.h | 3 + frontend/libretro.c | 32 ++- frontend/libretro_core_options.h | 17 ++ frontend/main.c | 2 +- frontend/menu.c | 12 +- frontend/plugin_lib.c | 4 +- include/compiler_features.h | 4 +- libpcsxcore/cdrom-async.c | 3 +- libpcsxcore/database.c | 4 +- libpcsxcore/misc.c | 4 +- libpcsxcore/new_dynarec/assem_arm.c | 20 +- libpcsxcore/new_dynarec/assem_arm64.c | 20 +- libpcsxcore/new_dynarec/emu_if.c | 345 ++++++++++++++++++++---- libpcsxcore/new_dynarec/emu_if.h | 7 - libpcsxcore/new_dynarec/linkage_arm.S | 15 +- libpcsxcore/new_dynarec/linkage_arm64.S | 27 +- libpcsxcore/new_dynarec/new_dynarec.c | 131 +++++---- libpcsxcore/new_dynarec/new_dynarec.h | 45 +++- libpcsxcore/psxinterpreter.c | 11 + libpcsxcore/r3000a.h | 4 +- 24 files changed, 570 insertions(+), 175 deletions(-) create mode 160000 deps/libretro-common create mode 100644 frontend/libretro-rthreads.h diff --git a/.gitmodules b/.gitmodules index fa655497..d4665d30 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ [submodule "deps/lightning"] path = deps/lightning url = https://github.com/pcercuei/gnu_lightning.git +[submodule "deps/libretro-common"] + path = deps/libretro-common + url = https://github.com/libretro/libretro-common.git diff --git a/Makefile b/Makefile index 1e965a69..0d899162 100644 --- a/Makefile +++ b/Makefile @@ -129,6 +129,12 @@ OBJS += libpcsxcore/new_dynarec/pcsxmem.o else $(error no dynarec support for architecture $(ARCH)) endif + ifeq "$(NDRC_THREAD)" "1" + libpcsxcore/new_dynarec/new_dynarec.o: CFLAGS += -DNDRC_THREAD + libpcsxcore/new_dynarec/emu_if.o: CFLAGS += -DNDRC_THREAD + frontend/libretro.o: CFLAGS += -DNDRC_THREAD + USE_RTHREADS := 1 + endif else CFLAGS += -DDRC_DISABLE endif @@ -327,8 +333,8 @@ OBJS += deps/libretro-common/vfs/vfs_implementation_cdrom.o CFLAGS += -DHAVE_CDROM endif ifeq "$(USE_ASYNC_CDROM)" "1" -OBJS += frontend/libretro-rthreads.o CFLAGS += -DUSE_ASYNC_CDROM +USE_RTHREADS := 1 endif ifeq "$(USE_LIBRETRO_VFS)" "1" OBJS += deps/libretro-common/compat/compat_posix_string.o @@ -341,12 +347,24 @@ CFLAGS += -DUSE_LIBRETRO_VFS endif OBJS += frontend/libretro.o CFLAGS += -DFRONTEND_SUPPORTS_RGB565 +CFLAGS += -DHAVE_LIBRETRO +INC_LIBRETRO_COMMON := 1 ifneq ($(DYNAREC),lightrec) ifeq ($(MMAP_WIN32),1) OBJS += libpcsxcore/memmap_win32.o endif endif +endif # $(PLATFORM) == "libretro" + +ifeq "$(USE_RTHREADS)" "1" +OBJS += frontend/libretro-rthreads.o +OBJS += deps/libretro-common/features/features_cpu.o +frontend/main.o: CFLAGS += -DHAVE_CPU_FEATURES +INC_LIBRETRO_COMMON := 1 +endif +ifeq "$(INC_LIBRETRO_COMMON)" "1" +CFLAGS += -Ideps/libretro-common/include endif ifeq "$(USE_PLUGIN_LIB)" "1" diff --git a/deps/libretro-common b/deps/libretro-common new file mode 160000 index 00000000..0abedaac --- /dev/null +++ b/deps/libretro-common @@ -0,0 +1 @@ +Subproject commit 0abedaac6a795c093f2e1a22f3028fca9efdf3c9 diff --git a/frontend/libretro-rthreads.c b/frontend/libretro-rthreads.c index 96c861d3..72784d4d 100644 --- a/frontend/libretro-rthreads.c +++ b/frontend/libretro-rthreads.c @@ -7,3 +7,14 @@ #endif #include "../deps/libretro-common/rthreads/rthreads.c" + +// an "extension" +int sthread_set_name(sthread_t *thread, const char *name) +{ +#if defined(__GLIBC__) || defined(__MACH__) || \ + (defined(__ANDROID_API__) && __ANDROID_API__ >= 26) + if (thread) + return pthread_setname_np(thread->id, name); +#endif + return -1; +} diff --git a/frontend/libretro-rthreads.h b/frontend/libretro-rthreads.h new file mode 100644 index 00000000..851d448e --- /dev/null +++ b/frontend/libretro-rthreads.h @@ -0,0 +1,3 @@ +#include "rthreads/rthreads.h" + +int sthread_set_name(sthread_t *thread, const char *name); diff --git a/frontend/libretro.c b/frontend/libretro.c index b5c3b92d..f7eb64cd 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -2290,7 +2290,7 @@ static void update_variables(bool in_flight) psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); } } -#endif /* !DRC_DISABLE */ +#endif // !DRC_DISABLE var.value = NULL; var.key = "pcsx_rearmed_psxclock"; @@ -2301,14 +2301,28 @@ static void update_variables(bool in_flight) } #if !defined(DRC_DISABLE) && !defined(LIGHTREC) +#ifdef NDRC_THREAD + var.value = NULL; + var.key = "pcsx_rearmed_drc_thread"; + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + ndrc_g.hacks &= ~(NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON); + if (strcmp(var.value, "disabled") == 0) + ndrc_g.hacks |= NDHACK_THREAD_FORCE; + else if (strcmp(var.value, "enabled") == 0) + ndrc_g.hacks |= NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON; + // psxCpu->ApplyConfig(); will start/stop the thread + } +#endif + var.value = NULL; var.key = "pcsx_rearmed_nosmccheck"; if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - new_dynarec_hacks |= NDHACK_NO_SMC_CHECK; + ndrc_g.hacks |= NDHACK_NO_SMC_CHECK; else - new_dynarec_hacks &= ~NDHACK_NO_SMC_CHECK; + ndrc_g.hacks &= ~NDHACK_NO_SMC_CHECK; } var.value = NULL; @@ -2316,9 +2330,9 @@ static void update_variables(bool in_flight) if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - new_dynarec_hacks |= NDHACK_GTE_UNNEEDED; + ndrc_g.hacks |= NDHACK_GTE_UNNEEDED; else - new_dynarec_hacks &= ~NDHACK_GTE_UNNEEDED; + ndrc_g.hacks &= ~NDHACK_GTE_UNNEEDED; } var.value = NULL; @@ -2326,9 +2340,9 @@ static void update_variables(bool in_flight) if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - new_dynarec_hacks |= NDHACK_GTE_NO_FLAGS; + ndrc_g.hacks |= NDHACK_GTE_NO_FLAGS; else - new_dynarec_hacks &= ~NDHACK_GTE_NO_FLAGS; + ndrc_g.hacks &= ~NDHACK_GTE_NO_FLAGS; } var.value = NULL; @@ -2336,9 +2350,9 @@ static void update_variables(bool in_flight) if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { if (strcmp(var.value, "enabled") == 0) - new_dynarec_hacks |= NDHACK_NO_COMPAT_HACKS; + ndrc_g.hacks |= NDHACK_NO_COMPAT_HACKS; else - new_dynarec_hacks &= ~NDHACK_NO_COMPAT_HACKS; + ndrc_g.hacks &= ~NDHACK_NO_COMPAT_HACKS; } #endif /* !DRC_DISABLE && !LIGHTREC */ diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 86fe7834..a4ead77e 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -224,7 +224,24 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "enabled", }, +#if !defined(LIGHTREC) && defined(NDRC_THREAD) + { + "pcsx_rearmed_drc_thread", + "DynaRec threading", + NULL, + "Run the dynarec on another thread.", + NULL, + "system", + { + { "auto", "Auto" }, + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL }, + }, + "auto", + }, #endif +#endif // DRC_DISABLE { "pcsx_rearmed_psxclock", "PSX CPU Clock Speed (%)", diff --git a/frontend/main.c b/frontend/main.c index 61dbf637..750e5661 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -159,7 +159,7 @@ void emu_set_default_config(void) spu_config.iTempo = 1; #endif #endif - new_dynarec_hacks = 0; + ndrc_g.hacks = 0; in_type[0] = PSE_PAD_TYPE_STANDARD; in_type[1] = PSE_PAD_TYPE_STANDARD; diff --git a/frontend/menu.c b/frontend/menu.c index 275028c5..49ffed9a 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -479,7 +479,7 @@ static const struct { CE_INTVAL(in_evdev_allow_abs_only), CE_INTVAL(volume_boost), CE_INTVAL(psx_clock), - CE_INTVAL(new_dynarec_hacks), + CE_INTVAL(ndrc_g.hacks), CE_INTVAL(in_enable_vibration), }; @@ -1630,10 +1630,10 @@ static const char h_cfg_stalls[] = "Will cause some games to run too fast"; static menu_entry e_menu_speed_hacks[] = { #ifndef DRC_DISABLE - mee_onoff_h ("Disable compat hacks", 0, new_dynarec_hacks, NDHACK_NO_COMPAT_HACKS, h_cfg_noch), - mee_onoff_h ("Disable SMC checks", 0, new_dynarec_hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc), - mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), - mee_onoff_h ("Disable GTE flags", 0, new_dynarec_hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs), + mee_onoff_h ("Disable compat hacks", 0, ndrc_g.hacks, NDHACK_NO_COMPAT_HACKS, h_cfg_noch), + mee_onoff_h ("Disable SMC checks", 0, ndrc_g.hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc), + mee_onoff_h ("Assume GTE regs unneeded", 0, ndrc_g.hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn), + mee_onoff_h ("Disable GTE flags", 0, ndrc_g.hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs), #endif mee_onoff_h ("Disable CPU/GTE stalls", 0, menu_iopts[0], 1, h_cfg_stalls), mee_end, @@ -2243,7 +2243,7 @@ static int romsel_run(void) printf("selected file: %s\n", fname); - new_dynarec_clear_full(); + ndrc_clear_full(); if (run_cd_image(fname) != 0) return -1; diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index c8a6fed4..1b63f241 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -764,14 +764,14 @@ void pl_frame_limit(void) // recompilation is not that fast and may cause frame skip on // loading screens and such, resulting in flicker or glitches - if (new_dynarec_did_compile) { + if (ndrc_g.did_compile) { if (drc_active_vsyncs < 32) pl_rearmed_cbs.fskip_advice = 0; drc_active_vsyncs++; } else drc_active_vsyncs = 0; - new_dynarec_did_compile = 0; + ndrc_g.did_compile = 0; } pcnt_start(PCNT_ALL); diff --git a/include/compiler_features.h b/include/compiler_features.h index 753706d7..0ab8468b 100644 --- a/include/compiler_features.h +++ b/include/compiler_features.h @@ -7,12 +7,12 @@ # else # define noinline __attribute__((noinline,noclone)) # endif -# define unused __attribute__((unused)) +# define attr_unused __attribute__((unused)) #else # define likely(x) (x) # define unlikely(x) (x) # define noinline -# define unused +# define attr_unused #endif #ifndef __has_builtin diff --git a/libpcsxcore/cdrom-async.c b/libpcsxcore/cdrom-async.c index 026a3451..2cb30473 100644 --- a/libpcsxcore/cdrom-async.c +++ b/libpcsxcore/cdrom-async.c @@ -114,7 +114,7 @@ static int cdrom_is_media_inserted(void *stream) { return 0; } #ifdef USE_ASYNC_CDROM -#include "rthreads/rthreads.h" +#include "../frontend/libretro-rthreads.h" #include "retro_timers.h" struct cached_buf { @@ -273,6 +273,7 @@ static void cdra_start_thread(void) acdrom.buf_cache[i].lba = ~0; } if (acdrom.thread) { + sthread_set_name(acdrom.thread, "pcsxr-cdrom"); SysPrintf("cdrom precache: %d buffers%s\n", acdrom.buf_cnt, acdrom.have_subchannel ? " +sub" : ""); } diff --git a/libpcsxcore/database.c b/libpcsxcore/database.c index c05b80cd..054e2a66 100644 --- a/libpcsxcore/database.c +++ b/libpcsxcore/database.c @@ -208,7 +208,7 @@ void Apply_Hacks_Cdrom(void) } /* Dynarec game-specific hacks */ - new_dynarec_hacks_pergame = 0; + ndrc_g.hacks_pergame = 0; Config.cycle_multiplier_override = 0; for (i = 0; i < ARRAY_SIZE(cycle_multiplier_overrides); i++) @@ -220,7 +220,7 @@ void Apply_Hacks_Cdrom(void) if (j < ARRAY_SIZE(cycle_multiplier_overrides[i].id)) { Config.cycle_multiplier_override = cycle_multiplier_overrides[i].mult; - new_dynarec_hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; + ndrc_g.hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M; SysPrintf("using cycle_multiplier_override: %d\n", Config.cycle_multiplier_override); break; diff --git a/libpcsxcore/misc.c b/libpcsxcore/misc.c index 39665872..28651025 100644 --- a/libpcsxcore/misc.c +++ b/libpcsxcore/misc.c @@ -735,7 +735,7 @@ int SaveState(const char *file) { psxHwFreeze(f, 1); psxRcntFreeze(f, 1); mdecFreeze(f, 1); - new_dyna_freeze(f, 1); + ndrc_freeze(f, 1); padFreeze(f, 1); result = 0; @@ -819,7 +819,7 @@ int LoadState(const char *file) { psxHwFreeze(f, 0); psxRcntFreeze(f, 0); mdecFreeze(f, 0); - new_dyna_freeze(f, 0); + ndrc_freeze(f, 0); padFreeze(f, 0); events_restore(); diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 308f4a00..5b1d6fdb 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -242,7 +242,7 @@ static void alloc_cc_optional(struct regstat *cur, int i) /* Assembler */ -static unused char regname[16][4] = { +static attr_unused char regname[16][4] = { "r0", "r1", "r2", @@ -318,7 +318,7 @@ static u_int genjmp(u_int addr) return ((u_int)offset>>2)&0xffffff; } -static unused void emit_breakpoint(void) +static attr_unused void emit_breakpoint(void) { assem_debug("bkpt #0\n"); //output_w32(0xe1200070); @@ -730,7 +730,7 @@ static void emit_lsls_imm(int rs,int imm,int rt) output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } -static unused void emit_lslpls_imm(int rs,int imm,int rt) +static attr_unused void emit_lslpls_imm(int rs,int imm,int rt) { assert(imm>0); assert(imm<32); @@ -812,7 +812,7 @@ static void emit_sar(u_int rs,u_int shift,u_int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8)); } -static unused void emit_orrshl(u_int rs,u_int shift,u_int rt) +static attr_unused void emit_orrshl(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -821,7 +821,7 @@ static unused void emit_orrshl(u_int rs,u_int shift,u_int rt) output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8)); } -static unused void emit_orrshr(u_int rs,u_int shift,u_int rt) +static attr_unused void emit_orrshr(u_int rs,u_int shift,u_int rt) { assert(rs<16); assert(rt<16); @@ -892,7 +892,7 @@ static void emit_cmovs_imm(int imm,int rt) output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval); } -static unused void emit_cmovne_reg(int rs,int rt) +static attr_unused void emit_cmovne_reg(int rs,int rt) { assem_debug("movne %s,%s\n",regname[rt],regname[rs]); output_w32(0x11a00000|rd_rn_rm(rt,0,rs)); @@ -1089,7 +1089,7 @@ static void *emit_cbz(int rs, const void *a) return ret; } -static unused void emit_callreg(u_int r) +static attr_unused void emit_callreg(u_int r) { assert(r<15); assem_debug("blx %s\n",regname[r]); @@ -1404,7 +1404,7 @@ static void emit_teq(int rs, int rt) output_w32(0xe1300000|rd_rn_rm(0,rs,rt)); } -static unused void emit_rsbimm(int rs, int imm, int rt) +static attr_unused void emit_rsbimm(int rs, int imm, int rt) { u_int armval; genimm_checked(imm,&armval); @@ -1462,7 +1462,7 @@ static void emit_callne(int a) } // Used to preload hash table entries -static unused void emit_prefetchreg(int r) +static attr_unused void emit_prefetchreg(int r) { assem_debug("pld %s\n",regname[r]); output_w32(0xf5d0f000|rd_rn_rm(0,r,0)); @@ -1484,7 +1484,7 @@ static void emit_orrne_imm(int rs,int imm,int rt) output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval); } -static unused void emit_addpl_imm(int rs,int imm,int rt) +static attr_unused void emit_addpl_imm(int rs,int imm,int rt) { u_int armval; genimm_checked(imm,&armval); diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index bad2854c..259c8e88 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -119,14 +119,14 @@ static void alloc_cc_optional(struct regstat *cur, int i) /* Assembler */ -static unused const char *regname[32] = { +static attr_unused const char *regname[32] = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23", "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp" }; -static unused const char *regname64[32] = { +static attr_unused const char *regname64[32] = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23", @@ -138,7 +138,7 @@ enum { COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV }; -static unused const char *condname[16] = { +static attr_unused const char *condname[16] = { "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv" }; @@ -356,7 +356,7 @@ static void emit_subs(u_int rs1, u_int rs2, u_int rt) output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); } -static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +static attr_unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) { assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); @@ -618,7 +618,7 @@ static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt) static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt) { - unused const char *st = s ? "s" : ""; + attr_unused const char *st = s ? "s" : ""; s = s ? 0x20000000 : 0; is64 = is64 ? 0x80000000 : 0; if (imm < 4096) { @@ -1293,8 +1293,8 @@ static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt) static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs) { u_int op = 0xb9000000; - unused const char *ldst = is_st ? "st" : "ld"; - unused char rp = is64 ? 'x' : 'w'; + attr_unused const char *ldst = is_st ? "st" : "ld"; + attr_unused char rp = is64 ? 'x' : 'w'; assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs); is64 = is64 ? 1 : 0; assert((ofs & ((1 << (2+is64)) - 1)) == 0); @@ -1307,8 +1307,8 @@ static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs) static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs) { u_int op = 0x29000000; - unused const char *ldst = is_st ? "st" : "ld"; - unused char rp = is64 ? 'x' : 'w'; + attr_unused const char *ldst = is_st ? "st" : "ld"; + attr_unused char rp = is64 ? 'x' : 'w'; assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs); is64 = is64 ? 1 : 0; assert((ofs & ((1 << (2+is64)) - 1)) == 0); @@ -2082,7 +2082,7 @@ static void do_miniht_insert(u_int return_address,u_int rt,int temp) { emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); } -static unused void clear_cache_arm64(char *start, char *end) +static attr_unused void clear_cache_arm64(char *start, char *end) { // Don't rely on GCC's __clear_cache implementation, as it caches // icache/dcache cache line sizes, that can vary between cores on diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 6c1b48c5..e4958018 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -16,14 +16,37 @@ #include "../r3000a.h" #include "../gte_arm.h" #include "../gte_neon.h" +#include "compiler_features.h" #define FLAGLESS #include "../gte.h" +#ifdef NDRC_THREAD +#include "../../frontend/libretro-rthreads.h" +#include "features/features_cpu.h" +#include "retro_timers.h" +#endif +#ifdef _3DS +#include <3ds_utils.h> +#endif +#ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif //#define evprintf printf #define evprintf(...) +#if !defined(DRC_DISABLE) && !defined(LIGHTREC) +// reduce global loads/literal pools (maybe) +#include "linkage_offsets.h" +#define dynarec_local_var4(x) dynarec_local[(x) / sizeof(dynarec_local[0])] +#define stop dynarec_local_var4(LO_stop) +#define psxRegs (*(psxRegisters *)((char *)dynarec_local + LO_psxRegs)) +#define next_interupt dynarec_local_var4(LO_next_interupt) +#define pending_exception dynarec_local_var4(LO_pending_exception) +#endif + +static void ari64_thread_sync(void); + void pcsx_mtc0(u32 reg, u32 val) { evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle); @@ -41,7 +64,7 @@ void pcsx_mtc0_ds(u32 reg, u32 val) MTC0(&psxRegs, reg, val); } -void new_dyna_freeze(void *f, int mode) +void ndrc_freeze(void *f, int mode) { const char header_save[8] = "ariblks"; uint32_t addrs[1024 * 4]; @@ -49,6 +72,8 @@ void new_dyna_freeze(void *f, int mode) int bytes; char header[8]; + ari64_thread_sync(); + if (mode != 0) { // save size = new_dynarec_save_blocks(addrs, sizeof(addrs)); if (size == 0) @@ -86,8 +111,17 @@ void new_dyna_freeze(void *f, int mode) //printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded"); } +void ndrc_clear_full(void) +{ + ari64_thread_sync(); + new_dynarec_clear_full(); +} + #if !defined(DRC_DISABLE) && !defined(LIGHTREC) +static void ari64_thread_init(void); +static int ari64_thread_check_range(unsigned int start, unsigned int end); + /* GTE stuff */ void *gte_handlers[64]; @@ -189,43 +223,9 @@ const uint64_t gte_reg_writes[64] = { [GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27), }; -static int ari64_init() -{ - static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); - size_t i; - - new_dynarec_init(); - new_dyna_pcsx_mem_init(); - - for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) - if (psxCP2[i] != gteNULL) - gte_handlers[i] = psxCP2[i]; - -#if defined(__arm__) && !defined(DRC_DBG) - gte_handlers[0x06] = gteNCLIP_arm; -#ifdef HAVE_ARMV5 - gte_handlers_nf[0x01] = gteRTPS_nf_arm; - gte_handlers_nf[0x30] = gteRTPT_nf_arm; -#endif -#ifdef __ARM_NEON__ - // compiler's _nf version is still a lot slower than neon - // _nf_arm RTPS is roughly the same, RTPT slower - gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; - gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; -#endif -#endif -#ifdef DRC_DBG - memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); -#endif - psxH_ptr = psxH; - zeromem_ptr = zero_mem; - scratch_buf_ptr = scratch_buf; - - return 0; -} - static void ari64_reset() { + ari64_thread_sync(); new_dyna_pcsx_mem_reset(); new_dynarec_invalidate_all_pages(); new_dyna_pcsx_mem_load_state(); @@ -268,11 +268,16 @@ static void ari64_execute_block(enum blockExecCaller caller) static void ari64_clear(u32 addr, u32 size) { - size *= 4; /* PCSX uses DMA units (words) */ + u32 end = addr + size * 4; /* PCSX uses DMA units (words) */ + + evprintf("ari64_clear %08x %04x\n", addr, size * 4); - evprintf("ari64_clear %08x %04x\n", addr, size); + if (!new_dynarec_quick_check_range(addr, end) && + !ari64_thread_check_range(addr, end)) + return; - new_dynarec_invalidate_range(addr, addr + size); + ari64_thread_sync(); + new_dynarec_invalidate_range(addr, end); } static void ari64_notify(enum R3000Anote note, void *data) { @@ -294,22 +299,263 @@ static void ari64_notify(enum R3000Anote note, void *data) { static void ari64_apply_config() { + int thread_changed; + + ari64_thread_sync(); intApplyConfig(); if (Config.DisableStalls) - new_dynarec_hacks |= NDHACK_NO_STALLS; + ndrc_g.hacks |= NDHACK_NO_STALLS; else - new_dynarec_hacks &= ~NDHACK_NO_STALLS; + ndrc_g.hacks &= ~NDHACK_NO_STALLS; - if (Config.cycle_multiplier != cycle_multiplier_old - || new_dynarec_hacks != new_dynarec_hacks_old) + thread_changed = (ndrc_g.hacks ^ ndrc_g.hacks_old) + & (NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON); + if (Config.cycle_multiplier != ndrc_g.cycle_multiplier_old + || ndrc_g.hacks != ndrc_g.hacks_old) { new_dynarec_clear_full(); } + if (thread_changed) + ari64_thread_init(); +} + +#ifdef NDRC_THREAD +static void clear_local_cache(void) +{ +#ifdef _3DS + if (ndrc_g.thread.cache_dirty) { + ndrc_g.thread.cache_dirty = 0; + ctr_clear_cache(); + } +#else + // hopefully nothing is needed, as tested on r-pi4 and switch +#endif +} + +static noinline void ari64_execute_threaded_slow(enum blockExecCaller block_caller) +{ + if (!ndrc_g.thread.busy) { + memcpy(ndrc_smrv_regs, psxRegs.GPR.r, sizeof(ndrc_smrv_regs)); + slock_lock(ndrc_g.thread.lock); + ndrc_g.thread.addr = psxRegs.pc; + ndrc_g.thread.busy = 1; + slock_unlock(ndrc_g.thread.lock); + scond_signal(ndrc_g.thread.cond); + } + + //ari64_notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); + psxInt.Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); + do + { + psxInt.ExecuteBlock(block_caller); + } + while (!stop && ndrc_g.thread.busy && block_caller == EXEC_CALLER_OTHER); + + psxInt.Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL); + //ari64_notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL); +} + +static void ari64_execute_threaded_once(enum blockExecCaller block_caller) +{ + psxRegisters *regs = (void *)((char *)dynarec_local + LO_psxRegs); + void *target; + + if (likely(!ndrc_g.thread.busy)) { + ndrc_g.thread.addr = 0; + target = ndrc_get_addr_ht_param(regs->pc, ndrc_cm_no_compile); + if (target) { + clear_local_cache(); + new_dyna_start_at(dynarec_local, target); + return; + } + } + ari64_execute_threaded_slow(block_caller); +} + +static void ari64_execute_threaded() +{ + schedule_timeslice(); + while (!stop) + { + ari64_execute_threaded_once(EXEC_CALLER_OTHER); + + if ((s32)(psxRegs.cycle - next_interupt) >= 0) + schedule_timeslice(); + } +} + +static void ari64_execute_threaded_block(enum blockExecCaller caller) +{ + if (caller == EXEC_CALLER_BOOT) + stop++; + + next_interupt = psxRegs.cycle + 1; + ari64_execute_threaded_once(caller); + + if (caller == EXEC_CALLER_BOOT) + stop--; +} + +static void ari64_thread_sync(void) +{ + if (!ndrc_g.thread.lock || !ndrc_g.thread.busy) + return; + for (;;) { + slock_lock(ndrc_g.thread.lock); + slock_unlock(ndrc_g.thread.lock); + if (!ndrc_g.thread.busy) + break; + retro_sleep(0); + } +} + +static int ari64_thread_check_range(unsigned int start, unsigned int end) +{ + u32 addr = ndrc_g.thread.addr; + if (!addr) + return 0; + + addr &= 0x1fffffff; + start &= 0x1fffffff; + end &= 0x1fffffff; + if (addr >= end) + return 0; + if (addr + MAXBLOCK * 4 <= start) + return 0; + + //SysPrintf("%x hits %x-%x\n", addr, start, end); + return 1; +} + +static void ari64_compile_thread(void *unused) +{ + void *target; + u32 addr; + + slock_lock(ndrc_g.thread.lock); + while (!ndrc_g.thread.exit) + { + if (!ndrc_g.thread.busy) + scond_wait(ndrc_g.thread.cond, ndrc_g.thread.lock); + addr = ndrc_g.thread.addr; + if (!ndrc_g.thread.busy || !addr || ndrc_g.thread.exit) + continue; + + target = ndrc_get_addr_ht_param(addr, ndrc_cm_compile_in_thread); + //printf("c %08x -> %p\n", addr, target); + ndrc_g.thread.busy = 0; + } + slock_unlock(ndrc_g.thread.lock); + (void)target; +} + +static void ari64_thread_shutdown(void) +{ + psxRec.Execute = ari64_execute; + psxRec.ExecuteBlock = ari64_execute_block; + + if (ndrc_g.thread.lock) + slock_lock(ndrc_g.thread.lock); + ndrc_g.thread.exit = 1; + if (ndrc_g.thread.lock) + slock_unlock(ndrc_g.thread.lock); + if (ndrc_g.thread.cond) + scond_signal(ndrc_g.thread.cond); + if (ndrc_g.thread.handle) { + sthread_join(ndrc_g.thread.handle); + ndrc_g.thread.handle = NULL; + } + if (ndrc_g.thread.cond) { + scond_free(ndrc_g.thread.cond); + ndrc_g.thread.cond = NULL; + } + if (ndrc_g.thread.lock) { + slock_free(ndrc_g.thread.lock); + ndrc_g.thread.lock = NULL; + } + ndrc_g.thread.busy = ndrc_g.thread.addr = 0; +} + +static void ari64_thread_init(void) +{ + int enable; + + if (ndrc_g.hacks & NDHACK_THREAD_FORCE) + enable = ndrc_g.hacks & NDHACK_THREAD_FORCE_ON; + else { + u32 cpu_count = cpu_features_get_core_amount(); + enable = cpu_count > 1; + } + + if (!ndrc_g.thread.handle == !enable) + return; + + ari64_thread_shutdown(); + ndrc_g.thread.busy = ndrc_g.thread.addr = ndrc_g.thread.exit = 0; + + if (enable) { + ndrc_g.thread.lock = slock_new(); + ndrc_g.thread.cond = scond_new(); + } + if (ndrc_g.thread.lock && ndrc_g.thread.cond) + ndrc_g.thread.handle = sthread_create(ari64_compile_thread, NULL); + if (ndrc_g.thread.handle) { + psxRec.Execute = ari64_execute_threaded; + psxRec.ExecuteBlock = ari64_execute_threaded_block; + } + else { + // clean up potential incomplete init + ari64_thread_shutdown(); + } + SysPrintf("compiler thread %sabled\n", ndrc_g.thread.handle ? "en" : "dis"); +} +#else // if !NDRC_THREAD +static void ari64_thread_init(void) {} +static void ari64_thread_shutdown(void) {} +static int ari64_thread_check_range(unsigned int start, unsigned int end) { return 0; } +#endif + +static int ari64_init() +{ + static u32 scratch_buf[8*8*2] __attribute__((aligned(64))); + size_t i; + + new_dynarec_init(); + new_dyna_pcsx_mem_init(); + + for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) + if (psxCP2[i] != gteNULL) + gte_handlers[i] = psxCP2[i]; + +#if defined(__arm__) && !defined(DRC_DBG) + gte_handlers[0x06] = gteNCLIP_arm; +#ifdef HAVE_ARMV5 + gte_handlers_nf[0x01] = gteRTPS_nf_arm; + gte_handlers_nf[0x30] = gteRTPT_nf_arm; +#endif +#ifdef __ARM_NEON__ + // compiler's _nf version is still a lot slower than neon + // _nf_arm RTPS is roughly the same, RTPT slower + gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; + gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; +#endif +#endif +#ifdef DRC_DBG + memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf)); +#endif + psxH_ptr = psxH; + zeromem_ptr = zero_mem; + scratch_buf_ptr = scratch_buf; + + ari64_thread_init(); + + return 0; } static void ari64_shutdown() { + ari64_thread_shutdown(); new_dynarec_cleanup(); new_dyna_pcsx_mem_shutdown(); } @@ -327,14 +573,10 @@ R3000Acpu psxRec = { #else // if DRC_DISABLE +struct ndrc_globals ndrc_g; // dummy unsigned int address; int pending_exception, stop; u32 next_interupt; -int new_dynarec_did_compile; -int cycle_multiplier_old; -int new_dynarec_hacks_pergame; -int new_dynarec_hacks_old; -int new_dynarec_hacks; void *psxH_ptr; void *zeromem_ptr; u32 zero_mem[0x1000/4]; @@ -353,6 +595,11 @@ void new_dyna_pcsx_mem_isolate(int enable) {} void new_dyna_pcsx_mem_shutdown(void) {} int new_dynarec_save_blocks(void *save, int size) { return 0; } void new_dynarec_load_blocks(const void *save, int size) {} + +#endif // DRC_DISABLE + +#ifndef NDRC_THREAD +static void ari64_thread_sync(void) {} #endif #ifdef DRC_DBG @@ -624,4 +871,4 @@ ok: badregs_mask_prev = badregs_mask; } -#endif +#endif // DRC_DBG diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index ec307fc4..1b587661 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -3,12 +3,6 @@ extern int dynarec_local[]; -/* same as psxRegs.GPR.n.* */ -extern int hi, lo; - -/* same as psxRegs.CP0.n.* */ -extern int reg_cop0[]; - /* COP2/GTE */ enum gte_opcodes { GTE_RTPS = 0x01, @@ -35,7 +29,6 @@ enum gte_opcodes { GTE_NCCT = 0x3f, }; -extern int reg_cop2d[], reg_cop2c[]; extern void *gte_handlers[64]; extern void *gte_handlers_nf[64]; extern const char *gte_regnames[64]; diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 2bcf6654..58e057b5 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -65,8 +65,8 @@ DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ @DRC_VAR(reg, 128) -DRC_VAR(lo, 4) -DRC_VAR(hi, 4) +@DRC_VAR(lo, 4) +@DRC_VAR(hi, 4) DRC_VAR(reg_cop0, 128) DRC_VAR(reg_cop2d, 128) DRC_VAR(reg_cop2c, 128) @@ -155,7 +155,7 @@ FUNCTION(dyna_linker): mov r5, r1 lsl r6, r6, #8 /* must not compile - that might expire the caller block */ - mov r1, #0 + mov r1, #0 /* ndrc_compile_mode */ bl ndrc_get_addr_ht_param movs r8, r0 @@ -404,12 +404,19 @@ invalidate_addr_call: .size invalidate_addr_call, .-invalidate_addr_call .align 2 -FUNCTION(new_dyna_start): +FUNCTION(new_dyna_start_at): /* ip is stored to conform EABI alignment */ + stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} + mov fp, r0 /* dynarec_local */ + mov r0, r1 + b new_dyna_start_at_e + +FUNCTION(new_dyna_start): stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr} mov fp, r0 /* dynarec_local */ ldr r0, [fp, #LO_pcaddr] bl ndrc_get_addr_ht +new_dyna_start_at_e: ldr r1, [fp, #LO_next_interupt] ldr r10, [fp, #LO_cycle] str r1, [fp, #LO_last_count] diff --git a/libpcsxcore/new_dynarec/linkage_arm64.S b/libpcsxcore/new_dynarec/linkage_arm64.S index fa8a4117..9e61ea1e 100644 --- a/libpcsxcore/new_dynarec/linkage_arm64.S +++ b/libpcsxcore/new_dynarec/linkage_arm64.S @@ -67,8 +67,8 @@ DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs) /* psxRegs */ #DRC_VAR(reg, 128) -DRC_VAR(lo, 4) -DRC_VAR(hi, 4) +#DRC_VAR(lo, 4) +#DRC_VAR(hi, 4) DRC_VAR(reg_cop0, 128) DRC_VAR(reg_cop2d, 128) DRC_VAR(reg_cop2c, 128) @@ -184,21 +184,28 @@ FUNCTION(jump_to_new_pc): /* stack must be aligned by 16, and include space for save_regs() use */ .align 2 +FUNCTION(new_dyna_start_at): + stp x29, x30, [sp, #-SSP_ALL]! + mov rFP, x0 + b new_dyna_start_at_e + FUNCTION(new_dyna_start): stp x29, x30, [sp, #-SSP_ALL]! - ldr w1, [x0, #LO_next_interupt] - ldr w2, [x0, #LO_cycle] + mov rFP, x0 + ldr w0, [rFP, #LO_pcaddr] + bl ndrc_get_addr_ht + mov x1, x0 +new_dyna_start_at_e: + ldr w3, [rFP, #LO_next_interupt] + ldr w2, [rFP, #LO_cycle] stp x19, x20, [sp, #16*1] stp x21, x22, [sp, #16*2] stp x23, x24, [sp, #16*3] stp x25, x26, [sp, #16*4] stp x27, x28, [sp, #16*5] - mov rFP, x0 - ldr w0, [rFP, #LO_pcaddr] - str w1, [rFP, #LO_last_count] - sub rCC, w2, w1 - bl ndrc_get_addr_ht - br x0 + str w3, [rFP, #LO_last_count] + sub rCC, w2, w3 + br x1 ESIZE(new_dyna_start, .-new_dyna_start) .align 2 diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index ed2f4c63..c2899e42 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -69,6 +69,20 @@ static Jit g_jit; //#define inv_debug printf #define inv_debug(...) +// from linkage_* +extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) +extern int last_count; // last absolute target, often = next_interupt +extern int pcaddr; +extern int pending_exception; +extern int branch_target; + +/* same as psxRegs.CP0.n.* */ +extern int reg_cop0[]; +extern int reg_cop2d[], reg_cop2c[]; + +extern uintptr_t ram_offset; +extern uintptr_t mini_ht[32][2]; + #ifdef __i386__ #include "assem_x86.h" #endif @@ -83,7 +97,6 @@ static Jit g_jit; #endif #define RAM_SIZE 0x200000 -#define MAXBLOCK 2048 #define MAX_OUTPUT_BLOCK_SIZE 262144 #define EXPIRITY_OFFSET (MAX_OUTPUT_BLOCK_SIZE * 2) #define PAGE_COUNT 1024 @@ -100,6 +113,8 @@ static Jit g_jit; #define TC_REDUCE_BYTES 0 #endif +struct ndrc_globals ndrc_g; + struct ndrc_tramp { struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; @@ -269,7 +284,7 @@ static struct compile_info static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs static uint64_t gte_rt[MAXBLOCK]; static uint64_t gte_unneeded[MAXBLOCK]; - static u_int smrv[32]; // speculated MIPS register values + unsigned int ndrc_smrv_regs[32]; // speculated MIPS register values static u_int smrv_strong; // mask or regs that are likely to have correct values static u_int smrv_weak; // same, but somewhat less likely static u_int smrv_strong_next; // same, but after current insn executes @@ -319,20 +334,7 @@ static struct compile_info #define stat_clear(s) #endif - int new_dynarec_hacks; - int new_dynarec_hacks_pergame; - int new_dynarec_hacks_old; - int new_dynarec_did_compile; - - #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x)) - - extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG) - extern int last_count; // last absolute target, often = next_interupt - extern int pcaddr; - extern int pending_exception; - extern int branch_target; - extern uintptr_t ram_offset; - extern uintptr_t mini_ht[32][2]; + #define HACK_ENABLED(x) ((ndrc_g.hacks | ndrc_g.hacks_pergame) & (x)) /* registers that may be allocated */ /* 1-31 gpr */ @@ -403,7 +405,6 @@ void jump_to_new_pc(); void call_gteStall(); void new_dyna_leave(); -void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile); void *ndrc_get_addr_ht(u_int vaddr); void ndrc_add_jump_out(u_int vaddr, void *src); void ndrc_write_invalidate_one(u_int addr); @@ -494,6 +495,7 @@ static void end_tcache_write(void *start, void *end) sceKernelSyncVMDomain(sceBlock, start, len); #elif defined(_3DS) ctr_flush_invalidate_cache(); + ndrc_g.thread.cache_dirty = 1; #elif defined(HAVE_LIBNX) if (g_jit.type == JitType_CodeMemory) { armDCacheClean(start, len); @@ -502,8 +504,8 @@ static void end_tcache_write(void *start, void *end) __asm__ volatile("isb" ::: "memory"); } #elif defined(__aarch64__) - // as of 2021, __clear_cache() is still broken on arm64 - // so here is a custom one :( + // __clear_cache() doesn't handle differing cacheline sizes on big.LITTLE and + // leaves it to the kernel to virtualize ctr_el0, which some old kernels don't do clear_cache_arm64(start, end); #else __clear_cache(start, end); @@ -597,7 +599,6 @@ static void do_clear_cache(void) #define NO_CYCLE_PENALTY_THR 12 -int cycle_multiplier_old; static int cycle_multiplier_active; static int CLOCK_ADJUST(int x) @@ -726,7 +727,7 @@ static int doesnt_expire_soon(u_char *tcaddr) return diff > EXPIRITY_OFFSET + MAX_OUTPUT_BLOCK_SIZE; } -static unused void check_for_block_changes(u_int start, u_int end) +static attr_unused void check_for_block_changes(u_int start, u_int end) { u_int start_page = get_page_prev(start); u_int end_page = get_page(end - 1); @@ -805,7 +806,7 @@ static noinline u_int generate_exception(u_int pc) // Get address from virtual address // This is called from the recompiled JR/JALR instructions -static void noinline *get_addr(u_int vaddr, int can_compile) +static void noinline *get_addr(const u_int vaddr, enum ndrc_compile_mode compile_mode) { u_int start_page = get_page_prev(vaddr); u_int i, page, end_page = get_page(vaddr); @@ -833,18 +834,29 @@ static void noinline *get_addr(u_int vaddr, int can_compile) if (found_clean) return found_clean; - if (!can_compile) + if (compile_mode == ndrc_cm_no_compile) return NULL; +#ifdef NDRC_THREAD + if (ndrc_g.thread.handle && compile_mode == ndrc_cm_compile_live) { + psxRegs.pc = vaddr; + return new_dyna_leave; + } + if (!ndrc_g.thread.handle) +#endif + memcpy(ndrc_smrv_regs, psxRegs.GPR.r, sizeof(ndrc_smrv_regs)); int r = new_recompile_block(vaddr); if (likely(r == 0)) return ndrc_get_addr_ht(vaddr); - return ndrc_get_addr_ht(generate_exception(vaddr)); + if (compile_mode == ndrc_cm_compile_live) + return ndrc_get_addr_ht(generate_exception(vaddr)); + + return NULL; } // Look up address in hash table first -void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile) +void *ndrc_get_addr_ht_param(unsigned int vaddr, enum ndrc_compile_mode compile_mode) { //check_for_block_changes(vaddr, vaddr + MAXBLOCK); const struct ht_entry *ht_bin = hash_table_get(vaddr); @@ -852,12 +864,14 @@ void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile) stat_inc(stat_ht_lookups); if (ht_bin->vaddr[0] == vaddr_a) return ht_bin->tcaddr[0]; if (ht_bin->vaddr[1] == vaddr_a) return ht_bin->tcaddr[1]; - return get_addr(vaddr, can_compile); + return get_addr(vaddr, compile_mode); } +// "usual" addr lookup for indirect branches, etc +// to be used by currently running code only void *ndrc_get_addr_ht(u_int vaddr) { - return ndrc_get_addr_ht_param(vaddr, 1); + return ndrc_get_addr_ht_param(vaddr, ndrc_cm_compile_live); } static void clear_all_regs(signed char regmap[]) @@ -1239,6 +1253,7 @@ static const struct { FUNCNAME(cc_interrupt), FUNCNAME(gen_interupt), FUNCNAME(ndrc_get_addr_ht), + FUNCNAME(ndrc_get_addr_ht_param), FUNCNAME(jump_handler_read8), FUNCNAME(jump_handler_read16), FUNCNAME(jump_handler_read32), @@ -1615,6 +1630,24 @@ void new_dynarec_invalidate_range(unsigned int start, unsigned int end) invalidate_range(start, end, NULL, NULL); } +// check if the range may need invalidation (must be thread-safe) +int new_dynarec_quick_check_range(unsigned int start, unsigned int end) +{ + u_int start_page = get_page_prev(start); + u_int end_page = get_page(end - 1); + u_int page; + + if (inv_code_start <= start && end <= inv_code_end) + return 0; + for (page = start_page; page <= end_page; page++) { + if (blocks[page]) { + //SysPrintf("quick hit %x-%x\n", start, end); + return 1; + } + } + return 0; +} + static void ndrc_write_invalidate_many(u_int start, u_int end) { // this check is done by the caller @@ -2845,8 +2878,8 @@ static void *emit_fastpath_cmp_jump(int i, const struct regstat *i_regs, assert(addr >= 0); *offset_reg = -1; if(((smrv_strong|smrv_weak)>>mr)&1) { - type=get_ptr_mem_type(smrv[mr]); - //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); + type=get_ptr_mem_type(ndrc_smrv_regs[mr]); + //printf("set %08x @%08x r%d %d\n", ndrc_smrv_regs[mr], start+i*4, mr, type); } else { // use the mirror we are running on @@ -4209,28 +4242,27 @@ static void intcall_assemble(int i, const struct regstat *i_regs, int ccadj_) static void speculate_mov(int rs,int rt) { - if(rt!=0) { - smrv_strong_next|=1<=0) { if(get_final_value(hr,i,&value)) - smrv[dops[i].rt1]=value; - else smrv[dops[i].rt1]=constmap[i][hr]; + ndrc_smrv_regs[dops[i].rt1]=value; + else ndrc_smrv_regs[dops[i].rt1]=constmap[i][hr]; smrv_strong_next|=1<>24)==0xa0)) { + if(start<0x2000&&(dops[i].rt1==26||(ndrc_smrv_regs[dops[i].rt1]>>24)==0xa0)) { // special case for BIOS - smrv[dops[i].rt1]=0xa0000000; + ndrc_smrv_regs[dops[i].rt1]=0xa0000000; smrv_strong_next|=1<>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); #endif } @@ -6251,14 +6283,14 @@ void new_dynarec_clear_full(void) stat_clear(stat_blocks); stat_clear(stat_links); - if (cycle_multiplier_old != Config.cycle_multiplier - || new_dynarec_hacks_old != new_dynarec_hacks) + if (ndrc_g.cycle_multiplier_old != Config.cycle_multiplier + || ndrc_g.hacks_old != ndrc_g.hacks) { SysPrintf("ndrc config: mul=%d, ha=%x, pex=%d\n", - get_cycle_multiplier(), new_dynarec_hacks, Config.PreciseExceptions); + get_cycle_multiplier(), ndrc_g.hacks, Config.PreciseExceptions); } - cycle_multiplier_old = Config.cycle_multiplier; - new_dynarec_hacks_old = new_dynarec_hacks; + ndrc_g.cycle_multiplier_old = Config.cycle_multiplier; + ndrc_g.hacks_old = ndrc_g.hacks; } static int pgsize(void) @@ -6516,7 +6548,7 @@ void new_dynarec_load_blocks(const void *save, int size) psxRegs.GPR.r[i] = 0x1f800000; } - ndrc_get_addr_ht(sblocks[b].addr); + ndrc_get_addr_ht_param(sblocks[b].addr, ndrc_cm_compile_offline); for (f = sblocks[b].regflags, i = 0; f; f >>= 1, i++) { if (f & 1) @@ -8368,7 +8400,6 @@ static noinline void pass5a_preallocate1(void) static noinline void pass5b_preallocate2(void) { int i, hr, limit = min(slen - 1, MAXBLOCK - 2); - assert(slen < MAXBLOCK - 1); for (i = 0; i < limit; i++) { if (!i || !dops[i-1].is_jump) @@ -8987,7 +9018,7 @@ static int new_recompile_block(u_int addr) } start = addr; - new_dynarec_did_compile=1; + ndrc_g.did_compile = 1; if (Config.HLE && start == 0x80001000) // hlecall { void *beginning = start_block(); diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index d18ff630..8c168084 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -1,11 +1,9 @@ #define NEW_DYNAREC 1 -extern int pcaddr; +#define MAXBLOCK 2048 // in mips instructions + extern int pending_exception; extern int stop; -extern int new_dynarec_did_compile; - -extern int cycle_multiplier_old; #define NDHACK_NO_SMC_CHECK (1<<0) #define NDHACK_GTE_UNNEEDED (1<<1) @@ -13,17 +11,48 @@ extern int cycle_multiplier_old; #define NDHACK_OVERRIDE_CYCLE_M (1<<3) #define NDHACK_NO_STALLS (1<<4) #define NDHACK_NO_COMPAT_HACKS (1<<5) -extern int new_dynarec_hacks; -extern int new_dynarec_hacks_pergame; -extern int new_dynarec_hacks_old; +#define NDHACK_THREAD_FORCE (1<<6) +#define NDHACK_THREAD_FORCE_ON (1<<7) + +struct ndrc_globals +{ + int hacks; + int hacks_pergame; + int hacks_old; + int did_compile; + int cycle_multiplier_old; + struct { + void *handle; + void *lock; + void *cond; + unsigned int addr; + int busy; + int exit; + int cache_dirty; // 3ds only + } thread; +}; +extern struct ndrc_globals ndrc_g; void new_dynarec_init(void); void new_dynarec_cleanup(void); void new_dynarec_clear_full(void); -void new_dyna_start(void *context); int new_dynarec_save_blocks(void *save, int size); void new_dynarec_load_blocks(const void *save, int size); void new_dynarec_print_stats(void); +int new_dynarec_quick_check_range(unsigned int start, unsigned int end); void new_dynarec_invalidate_range(unsigned int start, unsigned int end); void new_dynarec_invalidate_all_pages(void); + +void new_dyna_start(void *context); +void new_dyna_start_at(void *context, void *compiled_code); + +enum ndrc_compile_mode { + ndrc_cm_no_compile = 0, + ndrc_cm_compile_live, // from executing code, vaddr is the current pc + ndrc_cm_compile_offline, + ndrc_cm_compile_in_thread, +}; +void *ndrc_get_addr_ht_param(unsigned int vaddr, enum ndrc_compile_mode compile_mode); + +extern unsigned int ndrc_smrv_regs[32]; diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c index 30608535..fadbf050 100644 --- a/libpcsxcore/psxinterpreter.c +++ b/libpcsxcore/psxinterpreter.c @@ -1228,6 +1228,15 @@ void intExecuteBlock(enum blockExecCaller caller) { execI_(memRLUT, regs_); } +static void intExecuteBlockBp(enum blockExecCaller caller) { + psxRegisters *regs_ = &psxRegs; + u8 **memRLUT = psxMemRLUT; + + branchSeen = 0; + while (!branchSeen) + execIbp(memRLUT, regs_); +} + static void intClear(u32 Addr, u32 Size) { } @@ -1316,6 +1325,7 @@ void intApplyConfig() { psxSPC[0x08] = psxJRe; psxSPC[0x09] = psxJALRe; psxInt.Execute = intExecuteBp; + psxInt.ExecuteBlock = intExecuteBlockBp; } else { psxBSC[0x20] = psxLB; psxBSC[0x21] = psxLH; @@ -1333,6 +1343,7 @@ void intApplyConfig() { psxSPC[0x08] = psxJR; psxSPC[0x09] = psxJALR; psxInt.Execute = intExecute; + psxInt.ExecuteBlock = intExecuteBlock; } // the dynarec may occasionally call the interpreter, in such a case the diff --git a/libpcsxcore/r3000a.h b/libpcsxcore/r3000a.h index 03aeee19..93a53ced 100644 --- a/libpcsxcore/r3000a.h +++ b/libpcsxcore/r3000a.h @@ -49,6 +49,7 @@ enum R3000Anote { enum blockExecCaller { EXEC_CALLER_BOOT, EXEC_CALLER_HLE, + EXEC_CALLER_OTHER, }; typedef struct { @@ -213,7 +214,8 @@ typedef struct { extern psxRegisters psxRegs; /* new_dynarec stuff */ -void new_dyna_freeze(void *f, int mode); +void ndrc_freeze(void *f, int mode); +void ndrc_clear_full(void); int psxInit(); void psxReset(); -- 2.39.5