From ba3814c189d3bd2332b66fb6c633a7d028e618fe Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 11 Sep 2022 11:12:45 +0200 Subject: [PATCH] Update lightrec 20220910 (#686) * git subrepo pull --force deps/lightning subrepo: subdir: "deps/lightning" merged: "b1dfc564e2" upstream: origin: "https://github.com/pcercuei/gnu_lightning.git" branch: "pcsx_rearmed" commit: "b1dfc564e2" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" * include: update lightning.h Update lightning.h with a copy generated from the latest master. Signed-off-by: Paul Cercueil * git subrepo pull --force deps/lightrec subrepo: subdir: "deps/lightrec" merged: "e122276183" upstream: origin: "https://github.com/pcercuei/lightrec.git" branch: "master" commit: "e122276183" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo.git" commit: "2f68596" * lightrec: Update to latest Lightrec API Remove the debug features since they aren't really useful in the libretro core. Update the glue code to use the updated API functions; and implement lightrec_can_hw_direct() for a slight performance increase. Signed-off-by: Paul Cercueil Signed-off-by: Paul Cercueil --- deps/lightning/.gitrepo | 4 +- deps/lightning/ChangeLog | 40 ++ deps/lightning/check/Makefile.am | 8 +- deps/lightning/check/catomic.c | 144 ++++++ deps/lightning/check/catomic.ok | 5 + deps/lightning/check/lightning.c | 13 + deps/lightning/configure.ac | 12 + deps/lightning/doc/body.texi | 66 +++ deps/lightning/include/lightning.h.in | 9 + .../lightning/include/lightning/jit_private.h | 1 + deps/lightning/lib/jit_aarch64-cpu.c | 35 ++ deps/lightning/lib/jit_aarch64-sz.c | 2 + deps/lightning/lib/jit_aarch64.c | 8 + deps/lightning/lib/jit_alpha-cpu.c | 10 + deps/lightning/lib/jit_alpha-sz.c | 2 + deps/lightning/lib/jit_alpha.c | 10 + deps/lightning/lib/jit_arm-cpu.c | 99 +++++ deps/lightning/lib/jit_arm-sz.c | 4 + deps/lightning/lib/jit_arm.c | 10 + deps/lightning/lib/jit_disasm.c | 18 + deps/lightning/lib/jit_fallback.c | 177 ++++++++ deps/lightning/lib/jit_hppa-cpu.c | 12 + deps/lightning/lib/jit_hppa-sz.c | 2 + deps/lightning/lib/jit_hppa.c | 10 + deps/lightning/lib/jit_ia64-cpu.c | 12 + deps/lightning/lib/jit_ia64-sz.c | 2 + deps/lightning/lib/jit_ia64.c | 10 + deps/lightning/lib/jit_mips-cpu.c | 38 ++ deps/lightning/lib/jit_mips-sz.c | 6 + deps/lightning/lib/jit_mips.c | 13 +- deps/lightning/lib/jit_names.c | 1 + deps/lightning/lib/jit_ppc-cpu.c | 135 +++++- deps/lightning/lib/jit_ppc-fpu.c | 70 ++- deps/lightning/lib/jit_ppc-sz.c | 8 + deps/lightning/lib/jit_ppc.c | 22 +- deps/lightning/lib/jit_print.c | 22 +- deps/lightning/lib/jit_riscv-cpu.c | 12 + deps/lightning/lib/jit_riscv-sz.c | 2 + deps/lightning/lib/jit_riscv.c | 10 + deps/lightning/lib/jit_s390-cpu.c | 12 + deps/lightning/lib/jit_s390-sz.c | 4 + deps/lightning/lib/jit_s390.c | 10 + deps/lightning/lib/jit_sparc-cpu.c | 12 + deps/lightning/lib/jit_sparc-sz.c | 4 + deps/lightning/lib/jit_sparc.c | 10 + deps/lightning/lib/jit_x86-cpu.c | 65 +++ deps/lightning/lib/jit_x86-sz.c | 8 + deps/lightning/lib/jit_x86.c | 8 + deps/lightning/lib/lightning.c | 417 ++++++++++++------ deps/lightrec/.gitrepo | 4 +- deps/lightrec/README.md | 4 +- deps/lightrec/blockcache.c | 53 ++- deps/lightrec/disassembler.c | 78 +++- deps/lightrec/disassembler.h | 31 ++ deps/lightrec/emitter.c | 251 ++++++++--- deps/lightrec/interpreter.c | 31 ++ deps/lightrec/lightrec-private.h | 65 ++- deps/lightrec/lightrec.c | 394 ++++++++++------- deps/lightrec/lightrec.h | 5 +- deps/lightrec/optimizer.c | 313 +++++++++---- deps/lightrec/reaper.c | 30 +- deps/lightrec/recompiler.c | 80 ++-- deps/lightrec/regcache.c | 173 +++++--- deps/lightrec/regcache.h | 25 +- include/lightning/lightning.h | 9 + libpcsxcore/lightrec/plugin.c | 186 ++++---- 66 files changed, 2618 insertions(+), 728 deletions(-) create mode 100644 deps/lightning/check/catomic.c create mode 100644 deps/lightning/check/catomic.ok create mode 100644 deps/lightning/lib/jit_fallback.c diff --git a/deps/lightning/.gitrepo b/deps/lightning/.gitrepo index 420b6e21..e1611ab6 100644 --- a/deps/lightning/.gitrepo +++ b/deps/lightning/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/gnu_lightning.git branch = pcsx_rearmed - commit = 7fce9abb2a6bfc3967b4e5705794e617ed909402 - parent = 94d482f4b7f5da2c5af7e3590b770261f907f185 + commit = b1dfc564e2327621d15e688911a398c3a729bd82 + parent = 7393802c34796806043533cd379e5bcbd66cfd54 method = merge cmdver = 0.4.3 diff --git a/deps/lightning/ChangeLog b/deps/lightning/ChangeLog index a8420405..7fe5c7c5 100644 --- a/deps/lightning/ChangeLog +++ b/deps/lightning/ChangeLog @@ -1,3 +1,43 @@ +2022-09-08 Paulo Andrade + + * lib/jit_fallback.c: Implement fallback compare and swap with + pthreads. + * check/Makefile.am: Update for new cas{r,i} simple test. + * check/catomic.c, check/catomic.ok: New test case for + simple compare and swap atomic operation. + * check/lightning.c: Add entries to be able to use + the new compare and swap atomic operation. Still missing + a general test, only the basic C version. + * include/lightning.h.in: Include pthread.h, even if not + needing a fallback compare and swap. + * include/lightning/jit_private.h: Add support for a register pair + in second argument. Required by the new casr and casi operations. + * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c, + lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_x86-cpu.c, + lib/jit_x86-sz.c, lib/jit_x86.c: Implement inline code for compare + and swap. + * lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c: Implement + inline code for compare and swap if cpu is armv7, otherwise, use + a fallback with pthreads. + * lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c, + lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c, + lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c, + lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c, + lib/jit_riscv-cpu.c, lib/jit_riscv-sz.c, lib/jit_riscv.c, + lib/jit_s390-cpu.c, lib/jit_s390-sz.c, lib/jit_s390.c, + lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c: Implement + fallback compare and swap with pthreads. At least some of these + should be updated for inline code generation. + * lib/jit_names.c, lib/jit_print.c: lib/lightning.c: Update for the + new compare and swap operation. + * doc/body.texi: Add simple documentation of the compare and swap + new operation. + +2022-08-12 Marc Nieper-Wißkirchen + + Document jit_align. + * doc/body.texi: Add documentation for jit_align. + 2022-05-14 Paulo Andrade * include/lightning.h.in: Reorder jit_mov{n,z}r in instruction list. diff --git a/deps/lightning/check/Makefile.am b/deps/lightning/check/Makefile.am index fc9f232e..3cc54d10 100644 --- a/deps/lightning/check/Makefile.am +++ b/deps/lightning/check/Makefile.am @@ -16,7 +16,8 @@ AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE -check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list +check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \ + catomic lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) lightning_SOURCES = lightning.c @@ -42,6 +43,9 @@ carg_SOURCES = carg.c cva_list_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) cva_list_SOURCES = cva_list.c +catomic_LDADD = $(top_builddir)/lib/liblightning.la -lm -lpthread $(SHLIB) +catomic_SOURCES = catomic.c + $(top_builddir)/lib/liblightning.la: cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la @@ -319,7 +323,7 @@ $(nodata_TESTS): check.nodata.sh TESTS += $(nodata_TESTS) endif -TESTS += ccall self setcode nodata ctramp carg cva_list +TESTS += ccall self setcode nodata ctramp carg cva_list catomic CLEANFILES = $(TESTS) #TESTS_ENVIRONMENT=$(srcdir)/run-test; diff --git a/deps/lightning/check/catomic.c b/deps/lightning/check/catomic.c new file mode 100644 index 00000000..04a2f89d --- /dev/null +++ b/deps/lightning/check/catomic.c @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include + +void alarm_handler(int unused) +{ + _exit(1); +} + +int +main(int argc, char *argv[]) +{ + jit_state_t *_jit; + void (*code)(void); + jit_node_t *jmpi_main, *label; + jit_node_t *func0, *func1, *func2, *func3; + jit_node_t *patch0, *patch1, *patch2, *patch3; + jit_word_t lock; + pthread_t tids[4]; + + /* If there is any bug, do not hang in "make check" */ + signal(SIGALRM, alarm_handler); + alarm(5); + + init_jit(argv[0]); + _jit = jit_new_state(); + + jmpi_main = jit_jmpi(); + +#define defun(name, line) \ + jit_name(#name); \ + jit_note("catomic.c", line); \ + name = jit_label(); \ + jit_prolog(); \ + jit_movi(JIT_V0, (jit_word_t)&lock); \ + jit_movi(JIT_R1, 0); \ + jit_movi(JIT_R2, line); \ + /* spin until get the lock */ \ + label = jit_label(); \ + jit_casr(JIT_R0, JIT_V0, JIT_R1, JIT_R2); \ + jit_patch_at(jit_beqi(JIT_R0, 0), label); \ + /* lock acquired */ \ + jit_prepare(); \ + /* pretend to be doing something useful for 0.01 usec + * while holding the lock */ \ + jit_pushargi(10000); \ + jit_finishi(usleep); \ + /* release lock */ \ + jit_movi(JIT_R1, 0); \ + jit_str(JIT_V0, JIT_R1); \ + /* Now test casi */ \ + jit_movi(JIT_R1, 0); \ + jit_movi(JIT_R2, line); \ + /* spin until get the lock */ \ + label = jit_label(); \ + jit_casi(JIT_R0, (jit_word_t)&lock, JIT_R1, JIT_R2); \ + jit_patch_at(jit_beqi(JIT_R0, 0), label); \ + /* lock acquired */ \ + jit_prepare(); \ + /* pretend to be doing something useful for 0.01 usec + * while holding the lock */ \ + jit_pushargi(10000); \ + jit_finishi(usleep); \ + jit_prepare(); \ + /* for make check, just print "ok" */ \ + jit_pushargi((jit_word_t)"ok"); \ + /*jit_pushargi((jit_word_t)#name);*/ \ + jit_finishi(puts); \ + /* release lock */ \ + jit_movi(JIT_R1, 0); \ + jit_str(JIT_V0, JIT_R1); \ + jit_ret(); \ + jit_epilog(); + defun(func0, __LINE__); + defun(func1, __LINE__); + defun(func2, __LINE__); + defun(func3, __LINE__); + + jit_patch(jmpi_main); + jit_name("main"); + jit_note("catomic.c", __LINE__); + jit_prolog(); + +#define start(tid) \ + /* set JIT_R0 to thread function */ \ + jit_patch_at(jit_movi(JIT_R0, 0), func##tid); \ + jit_prepare(); \ + /* pthread_t first argument */ \ + jit_pushargi((jit_word_t)(tids + tid)); \ + /* pthread_attr_t second argument */ \ + jit_pushargi((jit_word_t)NULL); \ + /* start routine third argument */ \ + jit_pushargr(JIT_R0); \ + /* argument to start routine fourth argument */ \ + jit_pushargi((jit_word_t)NULL); \ + /* start thread */ \ + jit_finishi(pthread_create); + /* spawn four threads */ + start(0); + start(1); + start(2); + start(3); + +#define join(tid) \ + /* load pthread_t value in JIT_R0 */ \ + jit_movi(JIT_R0, (jit_word_t)tids); \ + jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t)); \ + jit_prepare(); \ + jit_pushargr(JIT_R0); \ + jit_pushargi((jit_word_t)NULL); \ + jit_finishi(pthread_join); + /* wait for threads to finish */ + join(0); + join(1); + join(2); + join(3); + + jit_prepare(); + jit_pushargi((jit_word_t)"ok"); + jit_finishi(puts); + + jit_ret(); + jit_epilog(); + + code = jit_emit(); + +#if 1 + jit_disassemble(); +#endif + + jit_clear_state(); + + /* let first thread acquire the lock */ + lock = 0; + + (*code)(); + jit_destroy_state(); + + finish_jit(); + + return (0); +} diff --git a/deps/lightning/check/catomic.ok b/deps/lightning/check/catomic.ok new file mode 100644 index 00000000..b130552e --- /dev/null +++ b/deps/lightning/check/catomic.ok @@ -0,0 +1,5 @@ +ok +ok +ok +ok +ok diff --git a/deps/lightning/check/lightning.c b/deps/lightning/check/lightning.c index 3cf3e70d..34b5440e 100644 --- a/deps/lightning/check/lightning.c +++ b/deps/lightning/check/lightning.c @@ -316,6 +316,7 @@ static void ger_u(void); static void gei_u(void); static void gtr(void); static void gti(void); static void gtr_u(void); static void gti_u(void); static void ner(void); static void nei(void); +static void casr(void); static void casi(void); static void movr(void); static void movi(void); static void extr_c(void); static void extr_uc(void); static void extr_s(void); static void extr_us(void); @@ -636,6 +637,7 @@ static instr_t instr_vector[] = { entry(gtr), entry(gti), entry(gtr_u), entry(gti_u), entry(ner), entry(nei), + entry(casr), entry(casi), entry(movr), entry(movi), entry(extr_c), entry(extr_uc), entry(extr_s), entry(extr_us), @@ -1028,6 +1030,16 @@ name(void) \ jit_word_t im = get_imm(); \ jit_##name(r0, r1, r2, im); \ } +#define entry_ir_im_ir_ir(name) \ +static void \ +name(void) \ +{ \ + jit_gpr_t r0 = get_ireg(); \ + jit_word_t im = get_imm(); \ + jit_gpr_t r1 = get_ireg(), r2 = get_ireg(); \ + jit_##name(r0, im, r1, r2); \ +} + #define entry_ir_ir(name) \ static void \ name(void) \ @@ -1443,6 +1455,7 @@ entry_ir_ir_ir(ger_u) entry_ir_ir_im(gei_u) entry_ir_ir_ir(gtr) entry_ir_ir_im(gti) entry_ir_ir_ir(gtr_u) entry_ir_ir_im(gti_u) entry_ir_ir_ir(ner) entry_ir_ir_im(nei) +entry_ir_ir_ir_ir(casr) entry_ir_im_ir_ir(casi) entry_ir_ir(movr) static void movi(void) diff --git a/deps/lightning/configure.ac b/deps/lightning/configure.ac index 5b582d28..8200651c 100644 --- a/deps/lightning/configure.ac +++ b/deps/lightning/configure.ac @@ -136,10 +136,22 @@ if test "x$DISASSEMBLER" != "xno"; then return 0; } )], [ac_cv_test_new_disassembler=no],,) + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + #include + int main(int argc, char *argv[]) + { + struct disassemble_info dinfo; + INIT_DISASSEMBLE_INFO(dinfo, NULL, NULL, NULL); + return 0; + } + )], [ac_cv_test_new_disassemble_info=yes],[ac_cv_test_new_disassemble_info=no],) CFLAGS="$save_CFLAGS" if test "x$ac_cv_test_new_disassembler" != "xno"; then LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DBINUTILS_2_29=1" fi + if test "x$ac_cv_test_new_disassemble_info" != "xno"; then + LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DBINUTILS_2_38=1" + fi fi AC_ARG_ENABLE(devel-disassembler, diff --git a/deps/lightning/doc/body.texi b/deps/lightning/doc/body.texi index c174fcfb..1d8d2777 100644 --- a/deps/lightning/doc/body.texi +++ b/deps/lightning/doc/body.texi @@ -597,6 +597,12 @@ forward (not specified) @r{forward label} indirect (not specified) @r{special simple label} @end example +The following instruction is used to specify a minimal alignment for +the next instruction, usually with a label: +@example +align (not specified) @r{align code} +@end example + @code{label} is normally used as @code{patch_at} argument for backward jumps. @@ -649,6 +655,38 @@ that automatically binds the implicit label added by @code{patch} with the @code{movi}, but on some special conditions it is required to create an "unbound" label. +@code{align} is useful for creating multiple entry points to a +(trampoline) function that are all accessible through a single +function pointer. @code{align} receives an integer argument that +defines the minimal alignment of the address of a label directly +following the @code{align} instruction. The integer argument must be +a power of two and the effective alignment will be a power of two no +less than the argument to @code{align}. If the argument to +@code{align} is 16 or more, the effective alignment will match the +specified minimal alignment exactly. + +@example + jit_node_t *forward, *label1, *label2, *jump; + unsigned char *addr1, *addr2; +forward = jit_forward(); + jit_align(16); +label1 = jit_indirect(); @rem{/* first entry point */} +jump = jit_jmpi(); @rem{/* jump to first handler */} + jit_patch_at(jump, forward); + jit_align(16); +label2 = jit_indirect(); @rem{/* second entry point */} + ... @rem{/* second handler */} + jit_jmpr(...); + jit_link(forward); + ... @rem{/* first handler /*} + jit_jmpr(...); + ... + jit_emit(); + addr1 = jit_address(label1); + addr2 = jit_address(label2); + assert(addr2 - addr1 == 16); @rem{/* only one of the addresses needs to be remembered */} +@end example + @item Function prolog These macros are used to set up a function prolog. The @code{allocai} @@ -890,6 +928,34 @@ to save and load the values when making function calls. @code{pointer_p} expects a pointer argument, and will return non zero if the pointer is inside the generated jit code. Must be called after @code{jit_emit} and before @code{jit_destroy_state}. + +@item Atomic operations +Only compare-and-swap is implemented. It accepts four operands; +the second can be an immediate. + +The first argument is set with a boolean value telling if the operation +did succeed. + +Arguments must be different, cannot use the result register to also pass +an argument. + +The second argument is the address of a machine word. + +The third argument is the old value. + +The fourth argument is the new value. + +@example +casr 01 = (*O2 == O3) ? (*O2 = O4, 1) : 0 +casi 01 = (*O2 == O3) ? (*O2 = O4, 1) : 0 +@end example + +If value at the address in the second argument is equal to the third +argument, the address value is atomically modified to the value of the +fourth argument and the first argument is set to a non zero value. + +If the value at the address in the second argument is not equal to the +third argument nothing is done and the first argument is set to zero. @end table @node GNU lightning examples diff --git a/deps/lightning/include/lightning.h.in b/deps/lightning/include/lightning.h.in index 887a951a..6f8ee030 100644 --- a/deps/lightning/include/lightning.h.in +++ b/deps/lightning/include/lightning.h.in @@ -24,6 +24,7 @@ #include @MAYBE_INCLUDE_STDINT_H@ #include +#include #if defined(__hpux) && defined(__hppa__) # include @@ -913,6 +914,10 @@ typedef enum { #define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) #endif + jit_code_casr, jit_code_casi, +#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x) +#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x) + jit_code_last_code } jit_code_t; @@ -1081,6 +1086,10 @@ extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t, extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t, jit_int32_t, jit_int32_t, jit_word_t, jit_word_t); +#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h) +extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t, + jit_word_t, jit_word_t, + jit_int32_t, jit_int32_t); #define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w) extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t, jit_word_t, jit_word_t, jit_float32_t); diff --git a/deps/lightning/include/lightning/jit_private.h b/deps/lightning/include/lightning/jit_private.h index 0af24cbc..4925a864 100644 --- a/deps/lightning/include/lightning/jit_private.h +++ b/deps/lightning/include/lightning/jit_private.h @@ -276,6 +276,7 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_cc_a2_int 0x00100000 /* arg2 is immediate word */ #define jit_cc_a2_flt 0x00200000 /* arg2 is immediate float */ #define jit_cc_a2_dbl 0x00400000 /* arg2 is immediate double */ +#define jit_cc_a2_rlh 0x00800000 /* arg2 is a register pair */ #if __ia64__ || (__sparc__ && __WORDSIZE == 64) extern void diff --git a/deps/lightning/lib/jit_aarch64-cpu.c b/deps/lightning/lib/jit_aarch64-cpu.c index 7d2a99d6..7572be7c 100644 --- a/deps/lightning/lib/jit_aarch64-cpu.c +++ b/deps/lightning/lib/jit_aarch64-cpu.c @@ -318,6 +318,8 @@ typedef union { # define A64_LDRSB 0x38e06800 # define A64_STR 0xf8206800 # define A64_LDR 0xf8606800 +# define A64_LDAXR 0xc85ffc00 +# define A64_STLXR 0xc800fc00 # define A64_STRH 0x78206800 # define A64_LDRH 0x78606800 # define A64_LDRSH 0x78a06800 @@ -445,6 +447,8 @@ typedef union { # define LDR(Rt,Rn,Rm) oxxx(A64_LDR,Rt,Rn,Rm) # define LDRI(Rt,Rn,Imm12) oxxi(A64_LDRI,Rt,Rn,Imm12) # define LDUR(Rt,Rn,Imm9) oxx9(A64_LDUR,Rt,Rn,Imm9) +# define LDAXR(Rt,Rn) o_xx(A64_LDAXR,Rt,Rn) +# define STLXR(Rs,Rt,Rn) oxxx(A64_STLXR,Rs,Rn,Rt) # define STRB(Rt,Rn,Rm) oxxx(A64_STRB,Rt,Rn,Rm) # define STRBI(Rt,Rn,Imm12) oxxi(A64_STRBI,Rt,Rn,Imm12) # define STURB(Rt,Rn,Imm9) oxx9(A64_STURB,Rt,Rn,Imm9) @@ -674,6 +678,11 @@ static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # define extr_us(r0,r1) UXTH(r0,r1) # define extr_i(r0,r1) SXTW(r0,r1) # define extr_ui(r0,r1) UXTW(r0,r1) +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define movr(r0,r1) _movr(_jit,r0,r1) static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); # define movi(r0,i0) _movi(_jit,r0,i0) @@ -1826,6 +1835,32 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + /* retry: */ + retry = _jit->pc.w; + LDAXR(r0, r1); + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ + STLXR(r0, r3, r1); + jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ + /* done: */ + CSET(r0, CC_EQ); + done = _jit->pc.w; + patch_at(jump0, done); + patch_at(jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); +} + static void _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/deps/lightning/lib/jit_aarch64-sz.c b/deps/lightning/lib/jit_aarch64-sz.c index e1f6d961..90c87747 100644 --- a/deps/lightning/lib/jit_aarch64-sz.c +++ b/deps/lightning/lib/jit_aarch64-sz.c @@ -404,4 +404,6 @@ 8, /* bswapr_us */ 8, /* bswapr_ui */ 4, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_aarch64.c b/deps/lightning/lib/jit_aarch64.c index f0be046c..dadf76eb 100644 --- a/deps/lightning/lib/jit_aarch64.c +++ b/deps/lightning/lib/jit_aarch64.c @@ -1137,6 +1137,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rr(mov,); case_rrr(movn,); case_rrr(movz,); diff --git a/deps/lightning/lib/jit_alpha-cpu.c b/deps/lightning/lib/jit_alpha-cpu.c index 2dd701d7..3809aa3f 100644 --- a/deps/lightning/lib/jit_alpha-cpu.c +++ b/deps/lightning/lib/jit_alpha-cpu.c @@ -315,6 +315,9 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); # define negr(r0,r1) NEGQ(r1,r0) # define comr(r0,r1) NOT(r1,r0) # define addr(r0,r1,r2) ADDQ(r1,r2,r0) @@ -827,6 +830,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) patch_at(w, _jit->pc.w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_alpha-sz.c b/deps/lightning/lib/jit_alpha-sz.c index ecfeba3b..9653e35e 100644 --- a/deps/lightning/lib/jit_alpha-sz.c +++ b/deps/lightning/lib/jit_alpha-sz.c @@ -404,4 +404,6 @@ 16, /* bswapr_us */ 36, /* bswapr_ui */ 36, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_alpha.c b/deps/lightning/lib/jit_alpha.c index d7bb3ecf..1a78b907 100644 --- a/deps/lightning/lib/jit_alpha.c +++ b/deps/lightning/lib/jit_alpha.c @@ -64,6 +64,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_alpha-cpu.c" # include "jit_alpha-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1095,6 +1096,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1503,6 +1512,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_alpha-cpu.c" # include "jit_alpha-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_arm-cpu.c b/deps/lightning/lib/jit_arm-cpu.c index 14ba36bb..91bb17c9 100644 --- a/deps/lightning/lib/jit_arm-cpu.c +++ b/deps/lightning/lib/jit_arm-cpu.c @@ -36,6 +36,7 @@ # define jit_armv5_p() (jit_cpu.version >= 5) # define jit_armv5e_p() (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend)) # define jit_armv6_p() (jit_cpu.version >= 6) +# define jit_armv7_p() (jit_cpu.version >= 7) # define jit_armv7r_p() 0 # define stack_framesize 48 extern int __aeabi_idivmod(int, int); @@ -179,7 +180,23 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define ARM_XTR8 0x00000400 /* ?xt? rotate 8 bits */ # define ARM_XTR16 0x00000800 /* ?xt? rotate 16 bits */ # define ARM_XTR24 0x00000c00 /* ?xt? rotate 24 bits */ +# define ARM_LDREX 0x01900090 +# define THUMB2_LDREX 0xe8500000 +# define ARM_STREX 0x01800090 +# define THUMB2_STREX 0xe8400000 /* << ARMv6* */ +/* >> ARMv7 */ +# define ARM_DMB 0xf57ff050 +# define THUMB2_DMB 0xf3bf8f50 +# define DMB_SY 0xf +# define DMB_ST 0xe +# define DMB_ISH 0xb +# define DMB_ISHST 0xa +# define DMB_NSH 0x7 +# define DMB_NSHT 0x6 +# define DMB_OSH 0x3 +# define DMB_OSHST 0x2 +/* << ARMv7 */ # define ARM_SHIFT 0x01a00000 # define ARM_R 0x00000010 /* register shift */ # define ARM_LSL 0x00000000 @@ -399,6 +416,12 @@ static void _tcit(jit_state_t*,unsigned int,int); static void _tpp(jit_state_t*,int,int); # define torl(o,rn,im) _torl(_jit,o,rn,im) static void _torl(jit_state_t*,int,int,int) maybe_unused; +# define DMB(im) dmb(im) +# define T2_DMB(im) tdmb(im) +# define dmb(im) _dmb(_jit, im) +static void _dmb(jit_state_t *_jit, int im); +# define tdmb(im) _tdmb(_jit, im) +static void _tdmb(jit_state_t *_jit, int im); # define CC_MOV(cc,rd,rm) corrr(cc,ARM_MOV,0,rd,rm) # define MOV(rd,rm) CC_MOV(ARM_CC_AL,rd,rm) # define T1_MOV(rd,rm) is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7)) @@ -718,6 +741,9 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define CC_LDRDIN(cc,rt,rn,im) corri8(cc,ARM_LDRDI,rn,rt,im) # define LDRDIN(rt,rn,im) CC_LDRDIN(ARM_CC_AL,rt,rn,im) # define T2_LDRDIN(rt,rt2,rn,im) torrri8(THUMB2_LDRDI,rn,rt,rt2,im) +# define CC_LDREX(cc,rt,rn) corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf) +# define LDREX(rt,rn) CC_LDREX(ARM_CC_AL,rt,rn) +# define T2_LDREX(rt,rn,im) torrri8(THUMB2_LDREX,rn,rt,0xf,im) # define CC_STRB(cc,rt,rn,rm) corrr(cc,ARM_STRB|ARM_P,rn,rt,rm) # define STRB(rt,rn,rm) CC_STRB(ARM_CC_AL,rt,rn,rm) # define T1_STRB(rt,rn,rm) is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt)) @@ -771,6 +797,9 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused; # define CC_STRDIN(cc,rt,rn,im) corri8(cc,ARM_STRDI,rn,rt,im) # define STRDIN(rt,rn,im) CC_STRDIN(ARM_CC_AL,rt,rn,im) # define T2_STRDIN(rt,rt2,rn,im) torrri8(THUMB2_STRDI,rn,rt,rt2,im) +# define CC_STREX(cc,rd,rt,rn) corrrr(cc,ARM_STREX,rn,rd,0xf,rt) +# define STREX(rd,rt,rn) CC_STREX(ARM_CC_AL,rd,rt,rn) +# define T2_STREX(rd,rt,rn,im) torrri8(THUMB2_STREX,rn,rt,rd,im) # define CC_LDMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im) # define LDMIA(rn,im) CC_LDMIA(ARM_CC_AL,rn,im) # define CC_LDM(cc,rn,im) CC_LDMIA(cc,rn,im) @@ -847,6 +876,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0,r1) _comr(_jit,r0,r1) static void _comr(jit_state_t*,jit_int32_t,jit_int32_t); # define negr(r0,r1) _negr(_jit,r0,r1) @@ -1508,6 +1542,22 @@ _torl(jit_state_t *_jit, int o, int rn, int im) iss(thumb.s[0], thumb.s[1]); } +static void +_dmb(jit_state_t *_jit, int im) +{ + assert(!(im & 0xfffffff0)); + ii(ARM_DMB|im); +} + +static void +_tdmb(jit_state_t *_jit, int im) +{ + jit_thumb_t thumb; + assert(!(im & 0xfffffff0)); + thumb.i = THUMB2_DMB | im; + iss(thumb.s[0], thumb.s[1]); +} + static void _nop(jit_state_t *_jit, jit_int32_t i0) { @@ -1610,6 +1660,55 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) _movznr(_jit, ARM_CC_EQ, r0, r1, r2); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if (!jit_armv7_p()) + fallback_casx(r0, r1, r2, r3, i0); + else { + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + if (jit_thumb_p()) { + T2_DMB(DMB_ISH); + /* retry: */ + retry = _jit->pc.w; + T2_LDREX(r0, r1, 0); + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ + T2_STREX(r0, r3, r1, 0); + jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ + /* done: */ + done = _jit->pc.w; + /* r0 = 0 if memory updated, 1 otherwise */ + xori(r0, r0, 1); + T2_DMB(DMB_ISH); + } + else { + DMB(DMB_ISH); + /* retry: */ + retry = _jit->pc.w; + LDREX(r0, r1); + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ + STREX(r0, r3, r1); + jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ + /* done: */ + done = _jit->pc.w; + /* r0 = 0 if memory updated, 1 otherwise */ + xori(r0, r0, 1); + DMB(DMB_ISH); + } + patch_at(arm_patch_jump, jump0, done); + patch_at(arm_patch_jump, jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); + } +} + static void _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/deps/lightning/lib/jit_arm-sz.c b/deps/lightning/lib/jit_arm-sz.c index 293d3069..79970098 100644 --- a/deps/lightning/lib/jit_arm-sz.c +++ b/deps/lightning/lib/jit_arm-sz.c @@ -405,6 +405,8 @@ 8, /* bswapr_us */ 4, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ @@ -814,5 +816,7 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_arm.c b/deps/lightning/lib/jit_arm.c index 0fdd1a7a..ae0e9f52 100644 --- a/deps/lightning/lib/jit_arm.c +++ b/deps/lightning/lib/jit_arm.c @@ -90,6 +90,7 @@ extern void __clear_cache(void *, void *); # include "jit_arm-cpu.c" # include "jit_arm-swf.c" # include "jit_arm-vfp.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1504,6 +1505,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _uc); case_rr(ext, _s); case_rr(ext, _us); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rr(mov,); case_rrr(movn,); case_rrr(movz,); @@ -2003,6 +2012,7 @@ _emit_code(jit_state_t *_jit) # include "jit_arm-cpu.c" # include "jit_arm-swf.c" # include "jit_arm-vfp.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_disasm.c b/deps/lightning/lib/jit_disasm.c index 25983a68..856a70bb 100644 --- a/deps/lightning/lib/jit_disasm.c +++ b/deps/lightning/lib/jit_disasm.c @@ -53,6 +53,20 @@ static jit_state_t *disasm_jit; static FILE *disasm_stream; #endif +#if BINUTILS_2_38 +static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...) +{ + va_list args; + int r; + + va_start(args, fmt); + r = vprintf(fmt, args); + va_end(args); + + return r; +} +#endif + /* * Implementation */ @@ -77,7 +91,11 @@ jit_init_debug(const char *progname) if (!disasm_stream) disasm_stream = stdout; +#if BINUTILS_2_38 + INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf, fprintf_styled); +#else INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf); +#endif disasm_info.arch = bfd_get_arch(disasm_bfd); disasm_info.mach = bfd_get_mach(disasm_bfd); diff --git a/deps/lightning/lib/jit_fallback.c b/deps/lightning/lib/jit_fallback.c new file mode 100644 index 00000000..9251947a --- /dev/null +++ b/deps/lightning/lib/jit_fallback.c @@ -0,0 +1,177 @@ +#if PROTO +#define fallback_save(r0) _fallback_save(_jit, r0) +static void _fallback_save(jit_state_t*, jit_int32_t); +#define fallback_load(r0) _fallback_load(_jit, r0) +static void _fallback_load(jit_state_t*, jit_int32_t); +#define fallback_save_regs(r0) _fallback_save_regs(_jit, r0) +static void _fallback_save_regs(jit_state_t*, jit_int32_t); +#define fallback_load_regs(r0) _fallback_load_regs(_jit, r0) +static void _fallback_load_regs(jit_state_t*, jit_int32_t); +#define fallback_calli(i0, i1) _fallback_calli(_jit, i0, i1) +static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t); +#define fallback_casx(r0,r1,r2,r3,im) _fallback_casx(_jit,r0,r1,r2,r3,im) +static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t, + jit_int32_t, jit_int32_t, jit_word_t); +#endif + +#if CODE +static void +_fallback_save(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + spec = _rvs[offset].spec; + regno = jit_regno(spec); + if (regno == r0) { + if (!(spec & jit_class_sav)) + stxi(_jitc->function->regoff[offset], rn(JIT_FP), regno); + break; + } + } +} + +static void +_fallback_load(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + spec = _rvs[offset].spec; + regno = jit_regno(spec); + if (regno == r0) { + if (!(spec & jit_class_sav)) + ldxi(regno, rn(JIT_FP), _jitc->function->regoff[offset]); + break; + } + } +} + +static void +_fallback_save_regs(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + regno = JIT_R(offset); + spec = _rvs[regno].spec; + if ((spec & jit_class_gpr) && regno == r0) + continue; + if (!(spec & jit_class_sav)) { + if (!_jitc->function->regoff[regno]) { + _jitc->function->regoff[regno] = + jit_allocai(sizeof(jit_word_t)); + _jitc->again = 1; + } + jit_regset_setbit(&_jitc->regsav, regno); + emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno); + } + } + /* If knew for certain float registers are not used by + * pthread_mutex_lock and pthread_mutex_unlock, could skip this */ + for (offset = 0; offset < JIT_F_NUM; offset++) { + regno = JIT_F(offset); + spec = _rvs[regno].spec; + if (!(spec & jit_class_sav)) { + if (!_jitc->function->regoff[regno]) { + _jitc->function->regoff[regno] = + jit_allocai(sizeof(jit_word_t)); + _jitc->again = 1; + } + jit_regset_setbit(&_jitc->regsav, regno); + emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno); + } + } +} + +static void +_fallback_load_regs(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t offset, regno, spec; + for (offset = 0; offset < JIT_R_NUM; offset++) { + regno = JIT_R(offset); + spec = _rvs[regno].spec; + if ((spec & jit_class_gpr) && regno == r0) + continue; + if (!(spec & jit_class_sav)) { + jit_regset_clrbit(&_jitc->regsav, regno); + emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]); + } + } + /* If knew for certain float registers are not used by + * pthread_mutex_lock and pthread_mutex_unlock, could skip this */ + for (offset = 0; offset < JIT_F_NUM; offset++) { + regno = JIT_F(offset); + spec = _rvs[regno].spec; + if (!(spec & jit_class_sav)) { + jit_regset_clrbit(&_jitc->regsav, regno); + emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]); + } + } +} + +static void +_fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1) +{ +# if defined(__mips__) + movi(rn(_A0), i1); +# elif defined(__arm__) + movi(rn(_R0), i1); +# elif defined(__sparc__) + movi(rn(_O0), i1); +# elif defined(__ia64__) + /* avoid confusion with pushargi patching */ + if (i1 >= -2097152 && i1 <= 2097151) + MOVI(_jitc->rout, i1); + else + MOVL(_jitc->rout, i1); +# elif defined(__hppa__) + movi(_R26_REGNO, i1); +# elif defined(__s390__) || defined(__s390x__) + movi(rn(_R2), i1); +# elif defined(__alpha__) + movi(rn(_A0), i1); +# elif defined(__riscv__) + movi(rn(JIT_RA0), i1); +# endif + calli(i0); +} + +static void +_fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t r1_reg, iscasi; + jit_word_t jump, done; + /* XXX only attempts to fallback cas for lightning jit code */ + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + if ((iscasi = r1 == _NOREG)) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + fallback_save_regs(r0); + fallback_calli((jit_word_t)pthread_mutex_lock, (jit_word_t)&mutex); + fallback_load(r1); + ldr(r0, r1); + fallback_load(r2); + eqr(r0, r0, r2); + fallback_save(r0); + jump = bnei(_jit->pc.w, r0, 1); + fallback_load(r3); +# if __WORDSIZE == 32 + str_i(r1, r3); +# else + str_l(r1, r3); +# endif + /* done: */ + done = _jit->pc.w; + fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex); + fallback_load(r0); +# if defined(__arm__) + patch_at(arm_patch_jump, jump, done); +# else + patch_at(jump, done); +# endif + fallback_load_regs(r0); + if (iscasi) + jit_unget_reg(r1_reg); +} +#endif diff --git a/deps/lightning/lib/jit_hppa-cpu.c b/deps/lightning/lib/jit_hppa-cpu.c index 6ca54f36..155ec91f 100644 --- a/deps/lightning/lib/jit_hppa-cpu.c +++ b/deps/lightning/lib/jit_hppa-cpu.c @@ -652,6 +652,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) #define comr(r0,r1) UADDCM(_R0_REGNO,r1,r0) #define negr(r0,r1) SUB(_R0_REGNO,r1,r0) #define extr_c(r0,r1) EXTRWR(r1,31,8,r0) @@ -1651,6 +1656,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) patch_at(w, _jit->pc.w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_hppa-sz.c b/deps/lightning/lib/jit_hppa-sz.c index 1bfb7e63..e984bacd 100644 --- a/deps/lightning/lib/jit_hppa-sz.c +++ b/deps/lightning/lib/jit_hppa-sz.c @@ -404,4 +404,6 @@ 36, /* bswapr_us */ 80, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_hppa.c b/deps/lightning/lib/jit_hppa.c index 26688429..b994571d 100644 --- a/deps/lightning/lib/jit_hppa.c +++ b/deps/lightning/lib/jit_hppa.c @@ -25,6 +25,7 @@ #define PROTO 1 # include "jit_hppa-cpu.c" # include "jit_hppa-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1028,6 +1029,14 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rrr(movn,); case_rrr(movz,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rr(mov,); case jit_code_movi: if (node->flag & jit_flag_node) { @@ -1459,6 +1468,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_hppa-cpu.c" # include "jit_hppa-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_ia64-cpu.c b/deps/lightning/lib/jit_ia64-cpu.c index 63bb92db..b28e8f1a 100644 --- a/deps/lightning/lib/jit_ia64-cpu.c +++ b/deps/lightning/lib/jit_ia64-cpu.c @@ -1311,6 +1311,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); # define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) @@ -3499,6 +3504,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) patch_at(w, _jit->pc.w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { diff --git a/deps/lightning/lib/jit_ia64-sz.c b/deps/lightning/lib/jit_ia64-sz.c index c81b3ea6..020349d8 100644 --- a/deps/lightning/lib/jit_ia64-sz.c +++ b/deps/lightning/lib/jit_ia64-sz.c @@ -404,4 +404,6 @@ 48, /* bswapr_us */ 48, /* bswapr_ui */ 16, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ia64.c b/deps/lightning/lib/jit_ia64.c index 8b4cd004..5664762f 100644 --- a/deps/lightning/lib/jit_ia64.c +++ b/deps/lightning/lib/jit_ia64.c @@ -52,6 +52,7 @@ extern void __clear_cache(void *, void *); #define PROTO 1 # include "jit_ia64-cpu.c" # include "jit_ia64-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1175,6 +1176,14 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1693,6 +1702,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_ia64-cpu.c" # include "jit_ia64-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_mips-cpu.c b/deps/lightning/lib/jit_mips-cpu.c index 06255891..08625923 100644 --- a/deps/lightning/lib/jit_mips-cpu.c +++ b/deps/lightning/lib/jit_mips-cpu.c @@ -522,6 +522,11 @@ static void _movi(jit_state_t*,jit_int32_t,jit_word_t); static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); # define movnr(r0,r1,r2) MOVN(r0, r1, r2) # define movzr(r0,r1,r2) MOVZ(r0, r1, r2) +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define ldr_c(r0,r1) LB(r0,0,r1) # define ldi_c(r0,i0) _ldi_c(_jit,r0,i0) static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t); @@ -1328,6 +1333,13 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + static void _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { @@ -2931,6 +2943,32 @@ _callr(jit_state_t *_jit, jit_int32_t r0) static void _calli(jit_state_t *_jit, jit_word_t i0) { + if (((_jit->pc.w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) { + if (can_sign_extend_short_p(i0)) { + JAL((i0 & ~0xf0000000) >> 2); + addiu(_T9_REGNO, _ZERO_REGNO, i0); + return; + } + + if (can_zero_extend_short_p(i0)) { + JAL((i0 & ~0xf0000000) >> 2); + ORI(_T9_REGNO, _ZERO_REGNO, i0); + return; + } + + if (can_sign_extend_int_p(i0)) { + if (i0 & 0xffff) { + LUI(_T9_REGNO, i0 >> 16); + JAL((i0 & ~0xf0000000) >> 2); + ORI(_T9_REGNO, _T9_REGNO, i0); + } else { + JAL((i0 & ~0xf0000000) >> 2); + LUI(_T9_REGNO, i0 >> 16); + } + return; + } + } + movi(_T9_REGNO, i0); JALR(_T9_REGNO); NOP(1); diff --git a/deps/lightning/lib/jit_mips-sz.c b/deps/lightning/lib/jit_mips-sz.c index b4642fa9..25f0712f 100644 --- a/deps/lightning/lib/jit_mips-sz.c +++ b/deps/lightning/lib/jit_mips-sz.c @@ -405,6 +405,8 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -814,6 +816,8 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* NEW_ABI */ #endif /* __WORDSIZE */ @@ -1222,4 +1226,6 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 116, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_mips.c b/deps/lightning/lib/jit_mips.c index 94fe797c..ecf025d0 100644 --- a/deps/lightning/lib/jit_mips.c +++ b/deps/lightning/lib/jit_mips.c @@ -67,6 +67,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); # include "jit_rewind.c" # include "jit_mips-cpu.c" # include "jit_mips-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1073,8 +1074,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) jit_inc_synth_w(finishr, r0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; - jit_movr(_T9, r0); - call = jit_callr(_T9); + call = jit_callr(r0); call->v.w = _jitc->function->self.argi; #if NEW_ABI call->w.w = call->v.w; @@ -1433,6 +1433,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1875,6 +1883,7 @@ _emit_code(jit_state_t *_jit) # include "jit_rewind.c" # include "jit_mips-cpu.c" # include "jit_mips-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_names.c b/deps/lightning/lib/jit_names.c index ebd3d56f..664adff8 100644 --- a/deps/lightning/lib/jit_names.c +++ b/deps/lightning/lib/jit_names.c @@ -230,4 +230,5 @@ static char *code_name[] = { "movr_d_w", "movi_d_w", "bswapr_us", "bswapr_ui", "bswapr_ul", + "casr", "casi", }; diff --git a/deps/lightning/lib/jit_ppc-cpu.c b/deps/lightning/lib/jit_ppc-cpu.c index cab085fd..ef47f9af 100644 --- a/deps/lightning/lib/jit_ppc-cpu.c +++ b/deps/lightning/lib/jit_ppc-cpu.c @@ -260,7 +260,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define LHAU(d,a,s) FDs(43,d,a,s) # define LHAUX(d,a,b) FX(31,d,a,b,375) # define LHAX(d,a,b) FX(31,d,a,b,343) -# define LHRBX(d,a,b) FX(31,d,a,b,790) +# define LHBRX(d,a,b) FX(31,d,a,b,790) # define LHZ(d,a,s) FDs(40,d,a,s) # define LHZU(d,a,s) FDs(41,d,a,s) # define LHZUX(d,a,b) FX(31,d,a,b,311) @@ -271,6 +271,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define LSWI(d,a,n) FX(31,d,a,n,597) # define LSWX(d,a,b) FX(31,d,a,b,533) # define LWARX(d,a,b) FX(31,d,a,b,20) +# define LDARX(d,a,b) FX(31,d,a,b,84) # define LWBRX(d,a,b) FX(31,d,a,b,534) # define LWA(d,a,s) FDs(58,d,a,s|2) # define LWAUX(d,a,b) FX(31,d,a,b,373) @@ -446,6 +447,7 @@ static void _MCRXR(jit_state_t*, jit_int32_t); # define STW(s,a,d) FDs(36,s,a,d) # define STWBRX(s,a,b) FX(31,s,a,b,662) # define STWCX_(s,a,b) FX_(31,s,a,b,150) +# define STDCX_(s,a,b) FX_(31,s,a,b,214) # define STWU(s,a,d) FDs(37,s,a,d) # define STWUX(s,a,b) FX(31,s,a,b,183) # define STWX(s,a,b) FX(31,s,a,b,151) @@ -511,6 +513,11 @@ static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define negr(r0,r1) NEG(r0,r1) # define comr(r0,r1) NOT(r0,r1) # define extr_c(r0,r1) EXTSB(r0,r1) @@ -521,10 +528,12 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); # define extr_i(r0,r1) EXTSW(r0,r1) # define extr_ui(r0,r1) CLRLDI(r0,r1,32) # endif -# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1) -static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); -# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1) -static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_us_lh(r0,r1,no_flag) _bswapr_us(_jit,r0,r1,no_flag) +# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1,0) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); +# define bswapr_ui_lw(r0,r1,no_flag) _bswapr_ui(_jit,r0,r1,no_flag) +# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1,0) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t); # if __WORDSIZE == 64 # define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1) # endif @@ -1148,8 +1157,70 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) } static void -_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) { + jit_int32_t r1_reg, iscasi; + jit_word_t retry, done, jump0, jump1; + if ((iscasi = (r1 == _NOREG))) { + r1_reg = jit_get_reg(jit_class_gpr); + r1 = rn(r1_reg); + movi(r1, i0); + } + SYNC(); + /* retry: */ + retry = _jit->pc.w; +# if __WORDSIZE == 32 + LWARX(r0, _R0_REGNO, r1); +# else + LDARX(r0, _R0_REGNO, r1); +# endif + jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */ +# if __WORDSIZE == 32 + STWCX_(r3, _R0_REGNO, r1); +# else + STDCX_(r3, _R0_REGNO, r1); +# endif + jump1 = bnei(_jit->pc.w, r0, 0); /* bne retry r0 0 */ + /* done: */ + done = _jit->pc.w; + ISYNC(); + MFCR(r0); + patch_at(jump0, done); + patch_at(jump1, retry); + if (iscasi) + jit_unget_reg(r1_reg); +} + +static void +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag) +{ + jit_int32_t reg, addr_reg; + + /* Convert load followed by bswap to a single instruction */ + /* FIXME r0 and r1 do not need to be the same, only must check if + * r1 was loaded in previous instruction */ + if (no_flag && r0 == r1) { + if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00022e | r0 << 21)) { + /* Convert LHZX to LHBRX */ + _jit->pc.ui--; + LHBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f); + return; + } + + if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0xa0000000 | r0 << 21)) { + /* Convert LHZ to LHBRX */ + _jit->pc.ui--; + addr_reg = (*_jit->pc.ui >> 16) & 0x1f; + + reg = jit_get_reg(jit_class_gpr); + LI(rn(reg), (short)*_jit->pc.ui); + LHBRX(r0, rn(reg), addr_reg); + jit_unget_reg(reg); + return; + } + } + if (r0 == r1) { RLWIMI(r0, r0, 16, 8, 15); RLWINM(r0, r0, 24, 16, 31); @@ -1160,9 +1231,34 @@ _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } static void -_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag) { - jit_int32_t reg; + jit_int32_t reg, addr_reg; + + /* Convert load followed by bswap to a single instruction */ + /* FIXME r0 and r1 do not need to be the same, only must check if + * r1 was loaded in previous instruction */ + if (no_flag && r0 == r1) { + if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00002e | r0 << 21)) { + /* Convert LWZX to LWBRX */ + _jit->pc.ui--; + LWBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f); + return; + } + + if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0x80000000 | r0 << 21)) { + /* Convert LWZ to LWBRX */ + _jit->pc.ui--; + addr_reg = (*_jit->pc.ui >> 16) & 0x1f; + + reg = jit_get_reg(jit_class_gpr); + LI(rn(reg), (short)*_jit->pc.ui); + LWBRX(r0, rn(reg), addr_reg); + jit_unget_reg(reg); + return; + } + } + reg = jit_get_reg(jit_class_gpr); ROTLWI(rn(reg), r1, 8); RLWIMI(rn(reg), r1, 24, 0, 7); @@ -1428,15 +1524,23 @@ _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) jit_unget_reg(reg); } +# define is_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0) + static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; + jit_int32_t reg, offt; if (can_zero_extend_short_p(i0)) ANDI_(r0, r1, i0); else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff)) ANDIS_(r0, r1, (jit_uword_t)i0 >> 16); - else { + else if (__WORDSIZE == 32 && is_mask(i0)) { + offt = __builtin_ctzl(i0); + RLWINM(r0, r1, 0, 32 - offt - __builtin_popcountl(i0), 31 - offt); + } else if (__WORDSIZE == 32 && is_mask(~i0)) { + offt = __builtin_ctzl(~i0); + RLWINM(r0, r1, 0, 32 - offt, 31 - offt - __builtin_popcountl(~i0)); + } else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); AND(r0, r1, rn(reg)); @@ -3204,10 +3308,13 @@ _calli(jit_state_t *_jit, jit_word_t i0 { # if _CALL_SYSV jit_word_t d; - d = (i0 - _jit->pc.w) & ~3; - if (can_sign_extend_jump_p(d)) - BL(d); - else + d = (i0 - _jit->pc.w - !!varargs * 4) & ~3; + if (can_sign_extend_jump_p(d)) { + /* Tell double arguments were passed in registers. */ + if (varargs) + CREQV(6, 6, 6); + BL(d); + } else # endif { movi(_R12_REGNO, i0); diff --git a/deps/lightning/lib/jit_ppc-fpu.c b/deps/lightning/lib/jit_ppc-fpu.c index 387cc6fd..18cc621a 100644 --- a/deps/lightning/lib/jit_ppc-fpu.c +++ b/deps/lightning/lib/jit_ppc-fpu.c @@ -143,8 +143,17 @@ static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t); # define absr_d(r0,r1) FABS(r0,r1) # define negr_f(r0,r1) negr_d(r0,r1) # define negr_d(r0,r1) FNEG(r0,r1) -# define sqrtr_f(r0,r1) FSQRTS(r0,r1) -# define sqrtr_d(r0,r1) FSQRT(r0,r1) +# ifdef _ARCH_PPCSQ +# define sqrtr_f(r0,r1) FSQRTS(r0,r1) +# define sqrtr_d(r0,r1) FSQRT(r0,r1) +# else +extern float sqrtf(float); +# define sqrtr_f(r0,r1) _sqrtr_f(_jit,r0,r1) +static void _sqrtr_f(jit_state_t*,jit_int32_t,jit_int32_t); +extern double sqrt(double); +# define sqrtr_d(r0,r1) _sqrtr_d(_jit,r0,r1) +static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t); +# endif # define addr_f(r0,r1,r2) FADDS(r0,r1,r2) # define addr_d(r0,r1,r2) FADD(r0,r1,r2) # define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) @@ -484,23 +493,40 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) ldi_d(r0, (jit_word_t)i0); } -/* should only work on newer ppc (fcfid is a ppc64 instruction) */ static void _extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { # if __WORDSIZE == 32 - jit_int32_t reg; + jit_int32_t reg, freg, off1, off2; + +# if __BYTE_ORDER == __BIG_ENDIAN + off1 = alloca_offset - 8; + off2 = alloca_offset - 4; +# else + off1 = alloca_offset - 4; + off2 = alloca_offset - 8; +# endif + reg = jit_get_reg(jit_class_gpr); - rshi(rn(reg), r1, 31); - /* use reserved 8 bytes area */ - stxi(alloca_offset - 4, _FP_REGNO, r1); - stxi(alloca_offset - 8, _FP_REGNO, rn(reg)); + freg = jit_get_reg(jit_class_fpr); + + movi(rn(reg), 0x43300000); + stxi_i(off1, _FP_REGNO, rn(reg)); + movi(rn(reg), 0x80000000); + stxi_i(off2, _FP_REGNO, rn(reg)); + ldxi_d(rn(freg), _FP_REGNO, alloca_offset - 8); + xorr(rn(reg), r1, rn(reg)); + stxi_i(off2, _FP_REGNO, rn(reg)); + ldxi_d(r0, _FP_REGNO, alloca_offset - 8); + subr_d(r0, r0, rn(freg)); + jit_unget_reg(reg); + jit_unget_reg(freg); # else stxi(alloca_offset - 8, _FP_REGNO, r1); -# endif ldxi_d(r0, _FP_REGNO, alloca_offset - 8); FCFID(r0, r0); +# endif } static void @@ -533,6 +559,32 @@ _truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } # endif +# ifndef _ARCH_PPCSQ +static void +_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr_f(rn(JIT_FA0), r1); + calli((jit_word_t)sqrtf +# if _CALL_SYSV + , 0 +# endif + ); + movr_f(r0, rn(JIT_FRET)); +} + +static void +_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + movr_d(rn(JIT_FA0), r1); + calli((jit_word_t)sqrt +# if _CALL_SYSV + , 0 +# endif + ); + movr_d(r0, rn(JIT_FRET)); +} +# endif + # define fpr_opi(name, type, size) \ static void \ _##name##i_##type(jit_state_t *_jit, \ diff --git a/deps/lightning/lib/jit_ppc-sz.c b/deps/lightning/lib/jit_ppc-sz.c index 0be7047b..9cd006cd 100644 --- a/deps/lightning/lib/jit_ppc-sz.c +++ b/deps/lightning/lib/jit_ppc-sz.c @@ -406,6 +406,8 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* _CALL_SYV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ @@ -819,6 +821,8 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* _CALL_AIX */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ @@ -1231,6 +1235,8 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 44, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __BYTEORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -1642,6 +1648,8 @@ 20, /* bswapr_us */ 16, /* bswapr_ui */ 44, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ppc.c b/deps/lightning/lib/jit_ppc.c index e94d1a5e..fd6964e2 100644 --- a/deps/lightning/lib/jit_ppc.c +++ b/deps/lightning/lib/jit_ppc.c @@ -1148,6 +1148,8 @@ _emit_code(jit_state_t *_jit) jit_word_t word; jit_int32_t value; jit_int32_t offset; + jit_bool_t no_flag = 0; /* Set if previous instruction is + * *not* a jump target. */ struct { jit_node_t *node; jit_word_t word; @@ -1356,13 +1358,25 @@ _emit_code(jit_state_t *_jit) # if __WORDSIZE == 64 case_rr(hton, _ul); # endif - case_rr(bswap, _us); - case_rr(bswap, _ui); + case jit_code_bswapr_us: + bswapr_us_lh(rn(node->u.w), rn(node->v.w), no_flag); + break; + case jit_code_bswapr_ui: + bswapr_ui_lw(rn(node->u.w), rn(node->v.w), no_flag); + break; # if __WORDSIZE == 64 case_rr(bswap, _ul); # endif case_rr(neg,); case_rr(com,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1683,7 +1697,7 @@ _emit_code(jit_state_t *_jit) } } else - (void)jmpi_p(node->u.w); + jmpi(node->u.w); break; case jit_code_callr: callr(rn(node->u.w) @@ -1823,6 +1837,8 @@ _emit_code(jit_state_t *_jit) assert(_jitc->regarg == 0 && _jitc->synth == 0); /* update register live state */ jit_reglive(node); + + no_flag = !(node->flag & jit_flag_patch); } #undef case_brf #undef case_brw diff --git a/deps/lightning/lib/jit_print.c b/deps/lightning/lib/jit_print.c index 61d9650c..ee37b025 100644 --- a/deps/lightning/lib/jit_print.c +++ b/deps/lightning/lib/jit_print.c @@ -58,7 +58,7 @@ void jit_init_print(void) { if (!print_stream) - print_stream = stderr; + print_stream = stdout; } void @@ -107,7 +107,7 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) (jit_cc_a0_int|jit_cc_a0_flt|jit_cc_a0_dbl|jit_cc_a0_jmp| jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_arg| jit_cc_a1_reg|jit_cc_a1_int|jit_cc_a1_flt|jit_cc_a1_dbl|jit_cc_a1_arg| - jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl); + jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl|jit_cc_a2_rlh); if (!(node->flag & jit_flag_synth) && ((value & jit_cc_a0_jmp) || node->code == jit_code_finishr || node->code == jit_code_finishi)) @@ -217,6 +217,18 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) print_chr(' '); print_reg(node->u.q.h); print_str(") "); print_reg(node->v.w); print_chr(' '); print_hex(node->w.w); return; + r_r_q: + print_chr(' '); print_reg(node->u.w); + print_chr(' '); print_reg(node->v.w); + print_str(" ("); print_reg(node->w.q.l); + print_chr(' '); print_reg(node->w.q.h); + print_str(") "); return; + r_w_q: + print_chr(' '); print_reg(node->u.w); + print_chr(' '); print_hex(node->v.w); + print_str(" ("); print_reg(node->w.q.l); + print_chr(' '); print_reg(node->w.q.h); + print_str(") "); return; r_r_f: print_chr(' '); print_reg(node->u.w); print_chr(' '); print_reg(node->v.w); @@ -357,6 +369,12 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node) case jit_cc_a0_reg|jit_cc_a0_rlh| jit_cc_a1_reg|jit_cc_a2_int: goto q_r_w; + case jit_cc_a0_reg|jit_cc_a1_reg| + jit_cc_a2_reg|jit_cc_a2_rlh: + goto r_r_q; + case jit_cc_a0_reg|jit_cc_a1_int| + jit_cc_a2_reg|jit_cc_a2_rlh: + goto r_w_q; case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_flt: goto r_r_f; case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_dbl: diff --git a/deps/lightning/lib/jit_riscv-cpu.c b/deps/lightning/lib/jit_riscv-cpu.c index 9f029c03..5046fac6 100644 --- a/deps/lightning/lib/jit_riscv-cpu.c +++ b/deps/lightning/lib/jit_riscv-cpu.c @@ -456,6 +456,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); + define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define ltr(r0, r1, r2) SLT(r0, r1, r2) # define lti(r0, r1, im) _lti(_jit, r0, r1, im) static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -1339,6 +1344,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) patch_at(w, _jit->pc.w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + static void _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_riscv-sz.c b/deps/lightning/lib/jit_riscv-sz.c index c8908d88..ea2911fe 100644 --- a/deps/lightning/lib/jit_riscv-sz.c +++ b/deps/lightning/lib/jit_riscv-sz.c @@ -403,4 +403,6 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 116, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_riscv.c b/deps/lightning/lib/jit_riscv.c index 1dc3c9ec..966604a0 100644 --- a/deps/lightning/lib/jit_riscv.c +++ b/deps/lightning/lib/jit_riscv.c @@ -34,6 +34,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1134,6 +1135,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _us); case_rr(ext, _i); case_rr(ext, _ui); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1558,6 +1567,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_s390-cpu.c b/deps/lightning/lib/jit_s390-cpu.c index 619ab152..2c107877 100644 --- a/deps/lightning/lib/jit_s390-cpu.c +++ b/deps/lightning/lib/jit_s390-cpu.c @@ -973,6 +973,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define addr(r0,r1,r2) _addr(_jit,r0,r1,r2) static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -2468,6 +2473,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) patch_at(w, _jit->pc.w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + static void _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/deps/lightning/lib/jit_s390-sz.c b/deps/lightning/lib/jit_s390-sz.c index bb9071d1..cea2d444 100644 --- a/deps/lightning/lib/jit_s390-sz.c +++ b/deps/lightning/lib/jit_s390-sz.c @@ -404,6 +404,8 @@ 52, /* bswapr_us */ 128, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -811,4 +813,6 @@ 68, /* bswapr_us */ 160, /* bswapr_ui */ 344, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_s390.c b/deps/lightning/lib/jit_s390.c index 4b89bea0..ef0c8998 100644 --- a/deps/lightning/lib/jit_s390.c +++ b/deps/lightning/lib/jit_s390.c @@ -88,6 +88,7 @@ extern void __clear_cache(void *, void *); #define PROTO 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1165,6 +1166,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1558,6 +1567,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_sparc-cpu.c b/deps/lightning/lib/jit_sparc-cpu.c index 90c3767b..ecea5066 100644 --- a/deps/lightning/lib/jit_sparc-cpu.c +++ b/deps/lightning/lib/jit_sparc-cpu.c @@ -552,6 +552,11 @@ static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0, r1) XNOR(r1, 0, r0) # define negr(r0, r1) NEG(r1, r0) # define addr(r0, r1, r2) ADD(r1, r2, r0) @@ -1233,6 +1238,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) patch_at(w, _jit->pc.w); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + fallback_casx(r0, r1, r2, r3, i0); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { diff --git a/deps/lightning/lib/jit_sparc-sz.c b/deps/lightning/lib/jit_sparc-sz.c index 5ec051d9..5e7ef95f 100644 --- a/deps/lightning/lib/jit_sparc-sz.c +++ b/deps/lightning/lib/jit_sparc-sz.c @@ -403,6 +403,8 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 @@ -810,4 +812,6 @@ 20, /* bswapr_us */ 52, /* bswapr_ui */ 116, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_sparc.c b/deps/lightning/lib/jit_sparc.c index 23d44425..a677998f 100644 --- a/deps/lightning/lib/jit_sparc.c +++ b/deps/lightning/lib/jit_sparc.c @@ -40,6 +40,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_sparc-cpu.c" # include "jit_sparc-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -1477,6 +1478,14 @@ _emit_code(jit_state_t *_jit) case_rr(ext, _i); case_rr(ext, _ui); #endif + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); @@ -1875,6 +1884,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_sparc-cpu.c" # include "jit_sparc-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_x86-cpu.c b/deps/lightning/lib/jit_x86-cpu.c index 81534f08..0d8affe8 100644 --- a/deps/lightning/lib/jit_x86-cpu.c +++ b/deps/lightning/lib/jit_x86-cpu.c @@ -369,6 +369,11 @@ static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t); static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t); # define movsr_u(r0, r1) _movsr_u(_jit, r0, r1) static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t); +# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) +static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_word_t); +#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0) +#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) #define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2) static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); #define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2) @@ -2218,6 +2223,66 @@ _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) mrm(0x03, r7(r0), r7(r1)); } +static void +_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, + jit_int32_t r2, jit_int32_t r3, jit_word_t i0) +{ + jit_int32_t save_rax, restore_rax; + jit_int32_t ascasr_reg, ascasr_use; + if (r0 != _RAX_REGNO) { /* result not in %rax */ + if (r2 != _RAX_REGNO) { /* old value not in %rax */ + save_rax = jit_get_reg(jit_class_gpr); + movr(rn(save_rax), _RAX_REGNO); + restore_rax = 1; + } + else + restore_rax = 0; + } + else + restore_rax = 0; + if (r2 != _RAX_REGNO) + movr(_RAX_REGNO, r2); + if (r1 == _NOREG) { /* using immediate address */ + if (!can_sign_extend_int_p(i0)) { + ascasr_reg = jit_get_reg(jit_class_gpr); + if (ascasr_reg == _RAX) { + ascasr_reg = jit_get_reg(jit_class_gpr); + jit_unget_reg(_RAX); + } + ascasr_use = 1; + movi(rn(ascasr_reg), i0); + } + else + ascasr_use = 0; + } + else + ascasr_use = 0; + ic(0xf0); /* lock */ + if (ascasr_use) + rex(0, WIDE, r3, _NOREG, rn(ascasr_reg)); + else + rex(0, WIDE, r3, _NOREG, r1); + ic(0x0f); + ic(0xb1); + if (r1 != _NOREG) /* casr */ + rx(r3, 0, r1, _NOREG, _SCL1); + else { /* casi */ + if (ascasr_use) + rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1); /* address in reg */ + else + rx(r3, i0, _NOREG, _NOREG, _SCL1); /* address in offset */ + } + cc(X86_CC_E, r0); + if (r0 != _RAX_REGNO) + movr(r0, _RAX_REGNO); + if (restore_rax) { + movr(_RAX_REGNO, rn(save_rax)); + jit_unget_reg(save_rax); + } + if (ascasr_use) + jit_unget_reg(ascasr_reg); +} + static void _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { diff --git a/deps/lightning/lib/jit_x86-sz.c b/deps/lightning/lib/jit_x86-sz.c index bd4b9a08..ff7548a1 100644 --- a/deps/lightning/lib/jit_x86-sz.c +++ b/deps/lightning/lib/jit_x86-sz.c @@ -404,6 +404,8 @@ 7, /* bswapr_us */ 4, /* bswapr_ui */ 0, /* bswapr_ul */ + 9, /* casr */ + 0, /* casi */ #endif #if __X64 @@ -812,6 +814,8 @@ 9, /* bswapr_us */ 6, /* bswapr_ui */ 6, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #else # if __X64_32 @@ -1219,6 +1223,8 @@ 9, /* bswapr_us */ 6, /* bswapr_ui */ 0, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ # else #define JIT_INSTR_MAX 115 @@ -1625,6 +1631,8 @@ 9, /* bswapr_us */ 6, /* bswapr_ui */ 6, /* bswapr_ul */ + 0, /* casr */ + 0, /* casi */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/deps/lightning/lib/jit_x86.c b/deps/lightning/lib/jit_x86.c index e3e13834..fb0b06ba 100644 --- a/deps/lightning/lib/jit_x86.c +++ b/deps/lightning/lib/jit_x86.c @@ -1674,6 +1674,14 @@ _emit_code(jit_state_t *_jit) case_rrw(gt, _u); case_rrr(ne,); case_rrw(ne,); + case jit_code_casr: + casr(rn(node->u.w), rn(node->v.w), + rn(node->w.q.l), rn(node->w.q.h)); + break; + case jit_code_casi: + casi(rn(node->u.w), node->v.w, + rn(node->w.q.l), rn(node->w.q.h)); + break; case_rrr(movn,); case_rrr(movz,); case_rr(mov,); diff --git a/deps/lightning/lib/lightning.c b/deps/lightning/lib/lightning.c index b78bd07c..e7ce3832 100644 --- a/deps/lightning/lib/lightning.c +++ b/deps/lightning/lib/lightning.c @@ -105,7 +105,7 @@ static jit_bool_t _reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node); #define redundant_store(node, jump) _redundant_store(_jit, node, jump) -static void +static jit_bool_t _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump); #define simplify_movr(p, n, k, s) _simplify_movr(_jit, p, n, k, s) @@ -131,7 +131,7 @@ static void _simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno); #define simplify() _simplify(_jit) -static void +static jit_bool_t _simplify(jit_state_t *_jit); #define jit_reg_undef -1 @@ -1138,6 +1138,20 @@ _jit_new_node_qww(jit_state_t *_jit, jit_code_t code, return (link_node(node)); } +jit_node_t * +_jit_new_node_wwq(jit_state_t *_jit, jit_code_t code, + jit_word_t u, jit_word_t v, + jit_int32_t l, jit_int32_t h) +{ + jit_node_t *node = new_node(code); + assert(!_jitc->realize); + node->u.w = u; + node->v.w = v; + node->w.q.l = l; + node->w.q.h = h; + return (link_node(node)); +} + jit_node_t * _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code, jit_word_t u, jit_word_t v, jit_float32_t w) @@ -1539,6 +1553,14 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_movnr: case jit_code_movzr: mask = jit_cc_a0_reg|jit_cc_a0_cnd|jit_cc_a1_reg|jit_cc_a2_reg; break; + case jit_code_casr: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg| + jit_cc_a2_reg|jit_cc_a2_rlh; + break; + case jit_code_casi: + mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int| + jit_cc_a2_reg|jit_cc_a2_rlh; + break; default: abort(); } @@ -1604,6 +1626,7 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label) void _jit_optimize(jit_state_t *_jit) { + jit_int32_t pass; jit_bool_t jump; jit_bool_t todo; jit_int32_t mask; @@ -1617,6 +1640,9 @@ _jit_optimize(jit_state_t *_jit) sequential_labels(); split_branches(); + pass = 0; + +second_pass: /* create initial mapping of live register values * at the start of a basic block */ for (offset = 0; offset < _jitc->blocks.offset; offset++) { @@ -1640,28 +1666,58 @@ _jit_optimize(jit_state_t *_jit) } } while (todo); - patch_registers(); - simplify(); + if (pass == 0) { + todo = 0; - /* figure out labels that are only reached with a jump - * and is required to do a simple redundant_store removal - * on jit_beqi below */ - jump = 1; - for (node = _jitc->head; node; node = node->next) { - switch (node->code) { - case jit_code_label: - if (!jump) - node->flag |= jit_flag_head; - break; - case jit_code_jmpi: case jit_code_jmpr: - case jit_code_epilog: - jump = 1; - break; - case jit_code_data: case jit_code_note: - break; - default: - jump = 0; - break; + patch_registers(); + if (simplify()) + todo = 1; + + /* figure out labels that are only reached with a jump + * and is required to do a simple redundant_store removal + * on jit_beqi below */ + jump = 1; + for (node = _jitc->head; node; node = node->next) { + switch (node->code) { + case jit_code_label: + if (!jump) + node->flag |= jit_flag_head; + break; + case jit_code_jmpi: case jit_code_jmpr: + case jit_code_epilog: + jump = 1; + break; + case jit_code_data: case jit_code_note: + break; + default: + jump = 0; + break; + } + } + + for (node = _jitc->head; node; node = node->next) { + mask = jit_classify(node->code); + if (mask & jit_cc_a0_reg) + node->u.w &= ~jit_regno_patch; + if (mask & jit_cc_a1_reg) + node->v.w &= ~jit_regno_patch; + if (mask & jit_cc_a2_reg) + node->w.w &= ~jit_regno_patch; + if (node->code == jit_code_beqi) { + if (redundant_store(node, 1)) + todo = 1; + } + else if (node->code == jit_code_bnei) { + if (redundant_store(node, 0)) + todo = 1; + } + } + + /* If instructions were removed, must recompute state at + * start of blocks. */ + if (todo) { + pass = 1; + goto second_pass; } } @@ -1673,69 +1729,59 @@ _jit_optimize(jit_state_t *_jit) node->v.w &= ~jit_regno_patch; if (mask & jit_cc_a2_reg) node->w.w &= ~jit_regno_patch; - switch (node->code) { - case jit_code_prolog: - _jitc->function = _jitc->functions.ptr + node->w.w; - break; - case jit_code_epilog: - _jitc->function = NULL; - break; - case jit_code_beqi: - redundant_store(node, 1); - break; - case jit_code_bnei: - redundant_store(node, 0); - break; - default: + if (node->code == jit_code_prolog) + _jitc->function = _jitc->functions.ptr + node->w.w; + else if(node->code == jit_code_epilog) + _jitc->function = NULL; + else { #if JIT_HASH_CONSTS - if (mask & jit_cc_a0_flt) { - node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a0_dbl) { - node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a1_flt) { - node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a1_dbl) { - node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a2_flt) { - node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4); - node->flag |= jit_flag_node | jit_flag_data; - } - else if (mask & jit_cc_a2_dbl) { - node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8); - node->flag |= jit_flag_node | jit_flag_data; - } + if (mask & jit_cc_a0_flt) { + node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a0_dbl) { + node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a1_flt) { + node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a1_dbl) { + node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a2_flt) { + node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4); + node->flag |= jit_flag_node | jit_flag_data; + } + else if (mask & jit_cc_a2_dbl) { + node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8); + node->flag |= jit_flag_node | jit_flag_data; + } #endif - if (_jitc->function) { - if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) == - (jit_cc_a0_reg|jit_cc_a0_chg)) { - if (mask & jit_cc_a0_rlh) { - jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->u.q.l)); - jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->u.q.h)); - } - else - jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->u.w)); - } - if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) == - (jit_cc_a1_reg|jit_cc_a1_chg)) + if (_jitc->function) { + if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) == + (jit_cc_a0_reg|jit_cc_a0_chg)) { + if (mask & jit_cc_a0_rlh) { + jit_regset_setbit(&_jitc->function->regset, + jit_regno(node->u.q.l)); jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->v.w)); - if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) == - (jit_cc_a2_reg|jit_cc_a2_chg)) + jit_regno(node->u.q.h)); + } + else jit_regset_setbit(&_jitc->function->regset, - jit_regno(node->w.w)); + jit_regno(node->u.w)); } - break; + if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) == + (jit_cc_a1_reg|jit_cc_a1_chg)) + jit_regset_setbit(&_jitc->function->regset, + jit_regno(node->v.w)); + if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) == + (jit_cc_a2_reg|jit_cc_a2_chg)) + jit_regset_setbit(&_jitc->function->regset, + jit_regno(node->w.w)); + } } } } @@ -1806,13 +1852,24 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) else jit_regset_setbit(&_jitc->reglive, node->v.w); } - if ((value & jit_cc_a2_reg) && !(node->w.w & jit_regno_patch)) { - if (value & jit_cc_a2_chg) { - jit_regset_clrbit(&_jitc->reglive, node->w.w); - jit_regset_setbit(&_jitc->regmask, node->w.w); + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + /* Assume registers are not changed */ + if (!(node->w.q.l & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->w.q.l); + if (!(node->w.q.h & jit_regno_patch)) + jit_regset_setbit(&_jitc->reglive, node->w.q.h); + } + else { + if (!(node->w.w & jit_regno_patch)) { + if (value & jit_cc_a2_chg) { + jit_regset_clrbit(&_jitc->reglive, node->w.w); + jit_regset_setbit(&_jitc->regmask, node->w.w); + } + else + jit_regset_setbit(&_jitc->reglive, node->w.w); + } } - else - jit_regset_setbit(&_jitc->reglive, node->w.w); } if (jit_regset_set_p(&_jitc->regmask)) { jit_update(node->next, &_jitc->reglive, &_jitc->regmask); @@ -1843,8 +1900,14 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) } if (value & jit_cc_a1_reg) jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w)); - if (value & jit_cc_a2_reg) - jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w)); + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.l)); + jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.h)); + } + else + jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w)); + } } void @@ -1863,8 +1926,14 @@ _jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) } if (value & jit_cc_a1_reg) jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w)); - if (value & jit_cc_a2_reg) - jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w)); + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.l)); + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.h)); + } + else + jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w)); + } } void @@ -2302,11 +2371,26 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) default: value = jit_classify(node->code); if (value & jit_cc_a2_reg) { - if (!(node->w.w & jit_regno_patch)) { - if (jit_regset_tstbit(®mask, node->w.w)) { - jit_regset_clrbit(®mask, node->w.w); - if (!(value & jit_cc_a2_chg)) - jit_regset_setbit(®live, node->w.w); + if (value & jit_cc_a2_rlh) { + if (!(node->w.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(®mask, node->w.q.l)) + jit_regset_clrbit(®mask, node->w.q.l); + } + if (!(node->w.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->w.q.h)) + jit_regset_clrbit(®mask, node->w.q.h); + } + } + else { + if (value & jit_cc_a2_reg) { + if (!(node->w.w & jit_regno_patch)) { + if (jit_regset_tstbit(®mask, node->w.w)) { + jit_regset_clrbit(®mask, node->w.w); + if (!(value & jit_cc_a2_chg)) + jit_regset_setbit(®live, node->w.w); + } + } } } } @@ -2374,19 +2458,19 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo) * means that only JIT_Vn registers can be trusted on * arrival of jmpr. */ + jit_regset_set_ui(®mask, 0); for (regno = 0; regno < _jitc->reglen; regno++) { spec = jit_class(_rvs[regno].spec); - if (jit_regset_tstbit(®mask, regno) && - (spec & (jit_class_gpr|jit_class_fpr)) && - !(spec & jit_class_sav)) - jit_regset_clrbit(®mask, regno); + if ((spec & (jit_class_gpr|jit_class_fpr)) && + (spec & jit_class_sav)) + jit_regset_setbit(®mask, regno); } /* Assume non callee save registers are live due * to jump to unknown location. */ /* Treat all callee save as live. */ - jit_regset_ior(®live, ®live, ®mask); + jit_regset_ior(&block->reglive, ®live, ®mask); /* Treat anything else as dead. */ - jit_regset_set_ui(®mask, 0); + return; } } break; @@ -2453,11 +2537,24 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, default: value = jit_classify(node->code); if (value & jit_cc_a2_reg) { - if (!(node->w.w & jit_regno_patch)) { - if (jit_regset_tstbit(mask, node->w.w)) { - jit_regset_clrbit(mask, node->w.w); - if (!(value & jit_cc_a2_chg)) - jit_regset_setbit(live, node->w.w); + if (value & jit_cc_a2_rlh) { + if (!(node->w.q.l & jit_regno_patch)) { + /* Assume register is not changed */ + if (jit_regset_tstbit(mask, node->w.q.l)) + jit_regset_clrbit(mask, node->w.q.l); + } + if (!(node->w.q.h & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->w.q.h)) + jit_regset_clrbit(mask, node->w.q.h); + } + } + else { + if (!(node->w.w & jit_regno_patch)) { + if (jit_regset_tstbit(mask, node->w.w)) { + jit_regset_clrbit(mask, node->w.w); + if (!(value & jit_cc_a2_chg)) + jit_regset_setbit(live, node->w.w); + } } } } @@ -2522,19 +2619,19 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, * means that only JIT_Vn registers can be trusted on * arrival of jmpr. */ + jit_regset_set_ui(mask, 0); for (regno = 0; regno < _jitc->reglen; regno++) { spec = jit_class(_rvs[regno].spec); - if (jit_regset_tstbit(mask, regno) && - (spec & (jit_class_gpr|jit_class_fpr)) && - !(spec & jit_class_sav)) - jit_regset_clrbit(mask, regno); + if ((spec & (jit_class_gpr|jit_class_fpr)) && + (spec & jit_class_sav)) + jit_regset_setbit(mask, regno); } /* Assume non callee save registers are live due * to jump to unknown location. */ /* Treat all callee save as live. */ jit_regset_ior(live, live, mask); /* Treat anything else as dead. */ - jit_regset_set_ui(mask, 0); + return; } } break; @@ -2930,7 +3027,7 @@ _reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node) return (0); } -static void +static jit_bool_t _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump) { jit_node_t *iter; @@ -2938,30 +3035,33 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump) jit_word_t word; jit_int32_t spec; jit_int32_t regno; + jit_bool_t result; if (jump) { prev = node->u.n; if (prev->code == jit_code_epilog) - return; + return (0); assert(prev->code == jit_code_label); if ((prev->flag & jit_flag_head) || node->link || prev->link != node) /* multiple sources */ - return; + return (0); /* if there are sequential labels it will return below */ } else prev = node; + result = 0; word = node->w.w; regno = jit_regno(node->v.w); for (iter = prev->next; iter; prev = iter, iter = iter->next) { switch (iter->code) { case jit_code_label: case jit_code_prolog: case jit_code_epilog: - return; + return (result); case jit_code_movi: if (regno == jit_regno(iter->u.w)) { if (iter->flag || iter->v.w != word) - return; + return (result); + result = 1; del_node(prev, iter); iter = prev; } @@ -2969,28 +3069,28 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump) default: spec = jit_classify(iter->code); if (spec & jit_cc_a0_jmp) - return; + return (result); if ((spec & (jit_cc_a0_reg|jit_cc_a0_chg)) == (jit_cc_a0_reg|jit_cc_a0_chg)) { if (spec & jit_cc_a0_rlh) { if (regno == jit_regno(iter->u.q.l) || regno == jit_regno(iter->u.q.h)) - return; + return (result); } else { if (regno == jit_regno(iter->u.w)) - return; + return (result); } } if ((spec & (jit_cc_a1_reg|jit_cc_a1_chg)) == (jit_cc_a1_reg|jit_cc_a1_chg)) { if (regno == jit_regno(iter->v.w)) - return; + return (result); } if ((spec & (jit_cc_a2_reg|jit_cc_a2_chg)) == (jit_cc_a2_reg|jit_cc_a2_chg)) { if (regno == jit_regno(iter->w.w)) - return; + return (result); } break; } @@ -3182,7 +3282,7 @@ _simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno) * once to the same value, and is a common pattern of calls * to jit_pushargi and jit_pushargr */ -static void +static jit_bool_t _simplify(jit_state_t *_jit) { jit_node_t *prev; @@ -3190,7 +3290,9 @@ _simplify(jit_state_t *_jit) jit_node_t *next; jit_int32_t info; jit_int32_t regno; + jit_bool_t result; + result = 0; for (prev = NULL, node = _jitc->head; node; prev = node, node = next) { next = node->next; switch (node->code) { @@ -3213,6 +3315,7 @@ _simplify(jit_state_t *_jit) * already holding */ patch_register(node->link->next, node, jit_regno_patch|regno, regno); + result = 1; del_node(_jitc->spill[regno], node->link); del_node(prev, node); node = prev; @@ -3222,38 +3325,50 @@ _simplify(jit_state_t *_jit) case jit_code_movr: regno = jit_regno(node->u.w); if (simplify_movr(prev, node, - jit_kind_word, sizeof(jit_word_t))) + jit_kind_word, sizeof(jit_word_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movi: regno = jit_regno(node->u.w); if (simplify_movi(prev, node, - jit_kind_word, sizeof(jit_word_t))) + jit_kind_word, sizeof(jit_word_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movr_f: regno = jit_regno(node->u.w); if (simplify_movr(prev, node, - jit_kind_float32, sizeof(jit_float32_t))) + jit_kind_float32, sizeof(jit_float32_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movi_f: regno = jit_regno(node->u.w); if (simplify_movi(prev, node, - jit_kind_float32, sizeof(jit_float32_t))) + jit_kind_float32, sizeof(jit_float32_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movr_d: regno = jit_regno(node->u.w); if (simplify_movr(prev, node, - jit_kind_float64, sizeof(jit_float64_t))) + jit_kind_float64, sizeof(jit_float64_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_movi_d: regno = jit_regno(node->u.w); if (simplify_movi(prev, node, - jit_kind_float64, sizeof(jit_float64_t))) + jit_kind_float64, sizeof(jit_float64_t))) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_ldxi_c: case jit_code_ldxi_uc: case jit_code_ldxi_s: case jit_code_ldxi_us: @@ -3261,15 +3376,19 @@ _simplify(jit_state_t *_jit) case jit_code_ldxi_l: case jit_code_ldxi_f: case jit_code_ldxi_d: regno = jit_regno(node->u.w); - if (simplify_ldxi(prev, node)) + if (simplify_ldxi(prev, node)) { + result = 1; simplify_spill(node = prev, regno); + } break; case jit_code_stxi_c: case jit_code_stxi_s: case jit_code_stxi_i: case jit_code_stxi_l: case jit_code_stxi_f: case jit_code_stxi_d: regno = jit_regno(node->u.w); - if (simplify_stxi(prev, node)) + if (simplify_stxi(prev, node)) { + result = 1; simplify_spill(node = prev, regno); + } break; default: info = jit_classify(node->code); @@ -3298,13 +3417,29 @@ _simplify(jit_state_t *_jit) ++_jitc->gen[regno]; } if (info & jit_cc_a2_chg) { - regno = jit_regno(node->w.w); - _jitc->values[regno].kind = 0; - ++_jitc->gen[regno]; +#if 0 + /* Assume registers are not changed */ + if (info & jit_cc_a2_rlh) { + regno = jit_regno(node->w.q.l); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + regno = jit_regno(node->w.q.h); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; + } + else { +#endif + regno = jit_regno(node->w.w); + _jitc->values[regno].kind = 0; + ++_jitc->gen[regno]; +#if 0 + } +#endif } break; } } + return (result); } static jit_int32_t @@ -3505,8 +3640,18 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link, } if ((value & jit_cc_a1_reg) && node->v.w == regno) node->v.w = patch; - if ((value & jit_cc_a2_reg) && node->w.w == regno) - node->w.w = patch; + if (value & jit_cc_a2_reg) { + if (value & jit_cc_a2_rlh) { + if (node->w.q.l == regno) + node->w.q.l = patch; + if (node->w.q.h == regno) + node->w.q.h = patch; + } + else { + if (node->w.w == regno) + node->w.w = patch; + } + } } } diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 9e55aa9f..3f25cc2e 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = 7545b5a7995be9e7b70e786a6b534004ea26c999 - parent = 2fba93f2853c57240f031adb4712acbd2a066d34 + commit = e1222761836bb478dcec86cf441dcc5514565137 + parent = eeff1b0a26e4c7f7449640c0bf999e506f538694 method = merge cmdver = 0.4.3 diff --git a/deps/lightrec/README.md b/deps/lightrec/README.md index 40ecc8f6..ab2c13b5 100644 --- a/deps/lightrec/README.md +++ b/deps/lightrec/README.md @@ -50,4 +50,6 @@ Lightrec has been ported to the following emulators: * [__pcsx4all__ (my own fork)](https://github.com/pcercuei/pcsx4all) -* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/) \ No newline at end of file +* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/) + +[![Star History Chart](https://api.star-history.com/svg?repos=pcercuei/lightrec&type=Date)](https://star-history.com/#pcercuei/lightrec&Date) diff --git a/deps/lightrec/blockcache.c b/deps/lightrec/blockcache.c index 70c5aebe..bb58cdb1 100644 --- a/deps/lightrec/blockcache.c +++ b/deps/lightrec/blockcache.c @@ -7,6 +7,8 @@ #include "debug.h" #include "lightrec-private.h" #include "memmanager.h" +#include "reaper.h" +#include "recompiler.h" #include #include @@ -117,6 +119,7 @@ static void lightrec_free_blocks(struct blockcache *cache, struct block *block, *next; bool outdated = all; unsigned int i; + u8 old_flags; for (i = 0; i < LUT_SIZE; i++) { for (block = cache->lut[i]; block; block = next) { @@ -130,7 +133,15 @@ static void lightrec_free_blocks(struct blockcache *cache, lightrec_block_is_outdated(state, block); } - if (outdated) { + if (!outdated) + continue; + + old_flags = block_set_flags(block, BLOCK_IS_DEAD); + + if (!(old_flags & BLOCK_IS_DEAD)) { + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + pr_debug("Freeing outdated block at PC 0x%08x\n", block->pc); remove_from_code_lut(cache, block); lightrec_unregister_block(cache, block); @@ -187,11 +198,27 @@ u32 lightrec_calculate_block_hash(const struct block *block) return hash; } +static void lightrec_reset_lut_offset(struct lightrec_state *state, void *d) +{ + u32 pc = (u32)(uintptr_t) d; + struct block *block; + void *addr; + + block = lightrec_find_block(state->block_cache, pc); + if (!block) + return; + + if (block_has_flag(block, BLOCK_IS_DEAD)) + return; + + addr = block->function ?: state->get_next_block; + lut_write(state, lut_offset(pc), addr); +} + bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block) { u32 offset = lut_offset(block->pc); bool outdated; - void *addr; if (lut_read(state, offset)) return false; @@ -200,12 +227,24 @@ bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *bloc if (likely(!outdated)) { /* The block was marked as outdated, but the content is still * the same */ - if (block->function) - addr = block->function; - else - addr = state->get_next_block; - lut_write(state, offset, addr); + if (ENABLE_THREADED_COMPILER) { + /* + * When compiling a block that covers ours, the threaded + * compiler will set the LUT entries of the various + * entry points. Therefore we cannot write the LUT here, + * as we would risk overwriting the new entry points. + * Leave it to the reaper to re-install the LUT entries. + */ + + lightrec_reaper_add(state->reaper, + lightrec_reset_lut_offset, + (void *)(uintptr_t) block->pc); + } else if (block->function) { + lut_write(state, offset, block->function); + } else { + lut_write(state, offset, state->get_next_block); + } } return outdated; diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c index 5c94324f..1a217bc2 100644 --- a/deps/lightrec/disassembler.c +++ b/deps/lightrec/disassembler.c @@ -11,7 +11,7 @@ #include "lightrec-private.h" #include "regcache.h" -static const char *std_opcodes[] = { +static const char * const std_opcodes[] = { [OP_J] = "j ", [OP_JAL] = "jal ", [OP_BEQ] = "beq ", @@ -42,7 +42,7 @@ static const char *std_opcodes[] = { [OP_SWC2] = "swc2 ", }; -static const char *special_opcodes[] = { +static const char * const special_opcodes[] = { [OP_SPECIAL_SLL] = "sll ", [OP_SPECIAL_SRL] = "srl ", [OP_SPECIAL_SRA] = "sra ", @@ -73,14 +73,14 @@ static const char *special_opcodes[] = { [OP_SPECIAL_SLTU] = "sltu ", }; -static const char *regimm_opcodes[] = { +static const char * const regimm_opcodes[] = { [OP_REGIMM_BLTZ] = "bltz ", [OP_REGIMM_BGEZ] = "bgez ", [OP_REGIMM_BLTZAL] = "bltzal ", [OP_REGIMM_BGEZAL] = "bgezal ", }; -static const char *cp0_opcodes[] = { +static const char * const cp0_opcodes[] = { [OP_CP0_MFC0] = "mfc0 ", [OP_CP0_CFC0] = "cfc0 ", [OP_CP0_MTC0] = "mtc0 ", @@ -88,38 +88,68 @@ static const char *cp0_opcodes[] = { [OP_CP0_RFE] = "rfe", }; -static const char *cp2_opcodes[] = { +static const char * const cp2_basic_opcodes[] = { [OP_CP2_BASIC_MFC2] = "mfc2 ", [OP_CP2_BASIC_CFC2] = "cfc2 ", [OP_CP2_BASIC_MTC2] = "mtc2 ", [OP_CP2_BASIC_CTC2] = "ctc2 ", }; -static const char *opcode_flags[] = { +static const char * const cp2_opcodes[] = { + [OP_CP2_RTPS] = "rtps ", + [OP_CP2_NCLIP] = "nclip ", + [OP_CP2_OP] = "op ", + [OP_CP2_DPCS] = "dpcs ", + [OP_CP2_INTPL] = "intpl ", + [OP_CP2_MVMVA] = "mvmva ", + [OP_CP2_NCDS] = "ncds ", + [OP_CP2_CDP] = "cdp ", + [OP_CP2_NCDT] = "ncdt ", + [OP_CP2_NCCS] = "nccs ", + [OP_CP2_CC] = "cc ", + [OP_CP2_NCS] = "ncs ", + [OP_CP2_NCT] = "nct ", + [OP_CP2_SQR] = "sqr ", + [OP_CP2_DCPL] = "dcpl ", + [OP_CP2_DPCT] = "dpct ", + [OP_CP2_AVSZ3] = "avsz3 ", + [OP_CP2_AVSZ4] = "avsz4 ", + [OP_CP2_RTPT] = "rtpt ", + [OP_CP2_GPF] = "gpf ", + [OP_CP2_GPL] = "gpl ", + [OP_CP2_NCCT] = "ncct ", +}; + +static const char * const mult2_opcodes[] = { + "mult2 ", "multu2 ", +}; + +static const char * const opcode_flags[] = { "switched branch/DS", "sync point", }; -static const char *opcode_io_flags[] = { +static const char * const opcode_io_flags[] = { "self-modifying code", "no invalidation", "no mask", }; -static const char *opcode_io_modes[] = { +static const char * const opcode_io_modes[] = { "Memory access", "I/O access", "RAM access", "BIOS access", "Scratchpad access", + "Mapped I/O access" }; -static const char *opcode_branch_flags[] = { +static const char * const opcode_branch_flags[] = { "emulate branch", "local branch", }; -static const char *opcode_multdiv_flags[] = { +static const char * const opcode_multdiv_flags[] = { "No LO", "No HI", "No div check", @@ -145,7 +175,7 @@ static const char * const reg_op_token[3] = { }; static int print_flags(char *buf, size_t len, const struct opcode *op, - const char **array, size_t array_size, + const char * const *array, size_t array_size, bool is_io) { const char *flag_name, *io_mode_name; @@ -223,7 +253,7 @@ static int print_flags(char *buf, size_t len, const struct opcode *op, } static int print_op_special(union code c, char *buf, size_t len, - const char ***flags_ptr, size_t *nb_flags) + const char * const **flags_ptr, size_t *nb_flags) { switch (c.r.op) { case OP_SPECIAL_SLL: @@ -294,17 +324,14 @@ static int print_op_special(union code c, char *buf, size_t len, static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp) { if (cp == 2) { - switch (c.i.rs) { - case OP_CP0_MFC0: - case OP_CP0_CFC0: - case OP_CP0_MTC0: - case OP_CP0_CTC0: + switch (c.r.op) { + case OP_CP2_BASIC: return snprintf(buf, len, "%s%s,%u", - cp2_opcodes[c.i.rs], + cp2_basic_opcodes[c.i.rs], lightrec_reg_name(c.i.rt), c.r.rd); default: - return snprintf(buf, len, "cp2 (0x%08x)", c.opcode); + return snprintf(buf, len, "%s", cp2_opcodes[c.r.op]); } } else { switch (c.i.rs) { @@ -325,7 +352,7 @@ static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp) } static int print_op(union code c, u32 pc, char *buf, size_t len, - const char ***flags_ptr, size_t *nb_flags, + const char * const **flags_ptr, size_t *nb_flags, bool *is_io) { if (c.opcode == 0) @@ -429,6 +456,15 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, return snprintf(buf, len, "exts %s,%s", lightrec_reg_name(c.i.rt), lightrec_reg_name(c.i.rs)); + case OP_META_MULT2: + case OP_META_MULTU2: + *flags_ptr = opcode_multdiv_flags; + *nb_flags = ARRAY_SIZE(opcode_multdiv_flags); + return snprintf(buf, len, "%s%s,%s,%s,%u", + mult2_opcodes[c.i.op == OP_META_MULTU2], + lightrec_reg_name(get_mult_div_hi(c)), + lightrec_reg_name(get_mult_div_lo(c)), + lightrec_reg_name(c.r.rs), c.r.op); default: return snprintf(buf, len, "unknown (0x%08x)", c.opcode); } @@ -437,7 +473,7 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr) { const struct opcode *op; - const char **flags_ptr; + const char * const *flags_ptr; size_t nb_flags, count, count2; char buf[256], buf2[256], buf3[256]; unsigned int i; diff --git a/deps/lightrec/disassembler.h b/deps/lightrec/disassembler.h index a4fc9f50..e4685a9d 100644 --- a/deps/lightrec/disassembler.h +++ b/deps/lightrec/disassembler.h @@ -34,6 +34,7 @@ #define LIGHTREC_IO_RAM 0x3 #define LIGHTREC_IO_BIOS 0x4 #define LIGHTREC_IO_SCRATCH 0x5 +#define LIGHTREC_IO_DIRECT_HW 0x6 #define LIGHTREC_IO_MASK LIGHTREC_IO_MODE(0x7) #define LIGHTREC_FLAGS_GET_IO_MODE(x) \ (((x) & LIGHTREC_IO_MASK) >> LIGHTREC_IO_MODE_LSB) @@ -110,6 +111,9 @@ enum standard_opcodes { OP_META_EXTC = 0x17, OP_META_EXTS = 0x18, + + OP_META_MULT2 = 0x19, + OP_META_MULTU2 = 0x1a, }; enum special_opcodes { @@ -160,6 +164,28 @@ enum cp0_opcodes { enum cp2_opcodes { OP_CP2_BASIC = 0x00, + OP_CP2_RTPS = 0x01, + OP_CP2_NCLIP = 0x06, + OP_CP2_OP = 0x0c, + OP_CP2_DPCS = 0x10, + OP_CP2_INTPL = 0x11, + OP_CP2_MVMVA = 0x12, + OP_CP2_NCDS = 0x13, + OP_CP2_CDP = 0x14, + OP_CP2_NCDT = 0x16, + OP_CP2_NCCS = 0x1b, + OP_CP2_CC = 0x1c, + OP_CP2_NCS = 0x1e, + OP_CP2_NCT = 0x20, + OP_CP2_SQR = 0x28, + OP_CP2_DCPL = 0x29, + OP_CP2_DPCT = 0x2a, + OP_CP2_AVSZ3 = 0x2d, + OP_CP2_AVSZ4 = 0x2e, + OP_CP2_RTPT = 0x30, + OP_CP2_GPF = 0x3d, + OP_CP2_GPL = 0x3e, + OP_CP2_NCCT = 0x3f, }; enum cp2_basic_opcodes { @@ -233,6 +259,11 @@ struct opcode { u32 flags; }; +struct opcode_list { + u16 nb_ops; + struct opcode ops[]; +}; + void lightrec_print_disassembly(const struct block *block, const u32 *code); static inline _Bool op_flag_no_ds(u32 flags) diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 3af04326..cf32f7a4 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -29,6 +29,15 @@ static void unknown_opcode(struct lightrec_cstate *state, const struct block *bl block->pc + (offset << 2)); } +static void +lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit) +{ + /* Prevent jit_jmpi() from using our cycles register as a temporary */ + jit_live(LIGHTREC_REG_CYCLE); + + jit_patch_abs(jit_jmpi(), state->state->eob_wrapper_func); +} + static void lightrec_emit_end_of_block(struct lightrec_cstate *state, const struct block *block, u16 offset, s8 reg_new_pc, u32 imm, u8 ra_reg, @@ -39,7 +48,6 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, const struct opcode *op = &block->opcode_list[offset], *next = &block->opcode_list[offset + 1]; u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c); - u16 offset_after_eob; jit_note(__FILE__, __LINE__); @@ -76,11 +84,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, pr_debug("EOB: %u cycles\n", cycles); } - offset_after_eob = offset + 1 + - (has_delay_slot(op->c) && !op_flag_no_ds(op->flags)); - - if (offset_after_eob < block->nb_ops) - state->branches[state->nb_branches++] = jit_b(); + lightrec_jump_to_eob(state, _jit); } void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block, @@ -99,7 +103,7 @@ void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block, jit_movi(JIT_V0, block->pc + (offset << 2)); jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles); - state->branches[state->nb_branches++] = jit_b(); + lightrec_jump_to_eob(state, _jit); } static u8 get_jr_jalr_reg(struct lightrec_cstate *state, const struct block *block, u16 offset) @@ -191,7 +195,7 @@ static void lightrec_do_early_unload(struct lightrec_cstate *state, } static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset, - jit_code_t code, u32 link, bool unconditional, bool bz) + jit_code_t code, jit_code_t code2, u32 link, bool unconditional, bool bz) { struct regcache *reg_cache = state->reg_cache; struct native_register *regs_backup; @@ -204,6 +208,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 bool is_forward = (s16)op->i.imm >= -1; int op_cycles = lightrec_cycles_of_opcode(op->c); u32 target_offset, cycles = state->cycles + op_cycles; + bool no_indirection = false; u32 next_pc; jit_note(__FILE__, __LINE__); @@ -221,6 +226,14 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 /* Unload dead registers before evaluating the branch */ if (OPT_EARLY_UNLOAD) lightrec_do_early_unload(state, block, offset); + + if (op_flag_local_branch(op->flags) && + (op_flag_no_ds(op->flags) || !next->opcode) && + is_forward && !lightrec_has_dirty_regs(reg_cache)) + no_indirection = true; + + if (no_indirection) + pr_debug("Using no indirection for branch at offset 0x%hx\n", offset << 2); } if (cycles) @@ -228,7 +241,8 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 if (!unconditional) { /* Generate the branch opcode */ - addr = jit_new_node_pww(code, NULL, rs, rt); + if (!no_indirection) + addr = jit_new_node_pww(code, NULL, rs, rt); lightrec_free_regs(reg_cache); regs_backup = lightrec_regcache_enter_branch(reg_cache); @@ -257,7 +271,10 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 state->nb_local_branches++]; branch->target = target_offset; - if (is_forward) + + if (no_indirection) + branch->branch = jit_new_node_pww(code2, NULL, rs, rt); + else if (is_forward) branch->branch = jit_b(); else branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0); @@ -270,7 +287,9 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 } if (!unconditional) { - jit_patch(addr); + if (!no_indirection) + jit_patch(addr); + lightrec_regcache_leave_branch(reg_cache, regs_backup); if (bz && link) { @@ -294,9 +313,9 @@ static void rec_BNE(struct lightrec_cstate *state, _jit_name(block->_jit, __func__); if (c.i.rt == 0) - rec_b(state, block, offset, jit_code_beqi, 0, false, true); + rec_b(state, block, offset, jit_code_beqi, jit_code_bnei, 0, false, true); else - rec_b(state, block, offset, jit_code_beqr, 0, false, false); + rec_b(state, block, offset, jit_code_beqr, jit_code_bner, 0, false, false); } static void rec_BEQ(struct lightrec_cstate *state, @@ -307,9 +326,9 @@ static void rec_BEQ(struct lightrec_cstate *state, _jit_name(block->_jit, __func__); if (c.i.rt == 0) - rec_b(state, block, offset, jit_code_bnei, 0, c.i.rs == 0, true); + rec_b(state, block, offset, jit_code_bnei, jit_code_beqi, 0, c.i.rs == 0, true); else - rec_b(state, block, offset, jit_code_bner, 0, c.i.rs == c.i.rt, false); + rec_b(state, block, offset, jit_code_bner, jit_code_beqr, 0, c.i.rs == c.i.rt, false); } static void rec_BLEZ(struct lightrec_cstate *state, @@ -318,28 +337,28 @@ static void rec_BLEZ(struct lightrec_cstate *state, union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bgti, 0, c.i.rs == 0, true); + rec_b(state, block, offset, jit_code_bgti, jit_code_blei, 0, c.i.rs == 0, true); } static void rec_BGTZ(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_blei, 0, false, true); + rec_b(state, block, offset, jit_code_blei, jit_code_bgti, 0, false, true); } static void rec_regimm_BLTZ(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bgei, 0, false, true); + rec_b(state, block, offset, jit_code_bgei, jit_code_blti, 0, false, true); } static void rec_regimm_BLTZAL(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_bgei, + rec_b(state, block, offset, jit_code_bgei, jit_code_blti, get_branch_pc(block, offset, 2), false, true); } @@ -349,7 +368,7 @@ static void rec_regimm_BGEZ(struct lightrec_cstate *state, union code c = block->opcode_list[offset].c; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_blti, 0, !c.i.rs, true); + rec_b(state, block, offset, jit_code_blti, jit_code_bgei, 0, !c.i.rs, true); } static void rec_regimm_BGEZAL(struct lightrec_cstate *state, @@ -357,7 +376,7 @@ static void rec_regimm_BGEZAL(struct lightrec_cstate *state, { const struct opcode *op = &block->opcode_list[offset]; _jit_name(block->_jit, __func__); - rec_b(state, block, offset, jit_code_blti, + rec_b(state, block, offset, jit_code_blti, jit_code_bgei, get_branch_pc(block, offset, 2), !op->i.rs, true); } @@ -662,8 +681,8 @@ static void rec_special_or_nor(struct lightrec_cstate *state, /* E(rd) = (E(rs) & E(rt)) | (E(rt) & !Z(rt)) | (E(rs) & !Z(rs)) */ if ((REG_EXT & flags_rs & flags_rt) || - (flags_rt & (REG_EXT | REG_ZEXT) == REG_EXT) || - (flags_rs & (REG_EXT | REG_ZEXT) == REG_EXT)) + ((flags_rt & (REG_EXT | REG_ZEXT)) == REG_EXT) || + ((flags_rs & (REG_EXT | REG_ZEXT)) == REG_EXT)) flags_rd |= REG_EXT; lightrec_set_reg_out_flags(reg_cache, rd, flags_rd); @@ -1034,22 +1053,41 @@ static void rec_special_MTLO(struct lightrec_cstate *state, rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs); } -static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block *block, - u32 arg, bool with_arg, enum c_wrappers wrapper) +static void call_to_c_wrapper(struct lightrec_cstate *state, + const struct block *block, u32 arg, + enum c_wrappers wrapper) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - u8 tmp; + s8 tmp, tmp2; - tmp = lightrec_alloc_reg_temp(reg_cache, _jit); - jit_ldxi(tmp, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, wrappers_eps[wrapper])); + /* Make sure JIT_R1 is not mapped; it will be used in the C wrapper. */ + tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1); - if (with_arg) { - jit_prepare(); - jit_pushargi(arg); + tmp = lightrec_get_reg_with_value(reg_cache, + (intptr_t) state->state->wrappers_eps[wrapper]); + if (tmp < 0) { + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + jit_ldxi(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, wrappers_eps[wrapper])); + + lightrec_temp_set_value(reg_cache, tmp, + (intptr_t) state->state->wrappers_eps[wrapper]); } + lightrec_free_reg(reg_cache, tmp2); + +#ifdef __mips__ + /* On MIPS, register t9 is always used as the target register for JALR. + * Therefore if it does not contain the target address we must + * invalidate it. */ + if (tmp != _T9) + lightrec_unload_reg(reg_cache, _jit, _T9); +#endif + + jit_prepare(); + jit_pushargi(arg); + lightrec_regcache_mark_live(reg_cache, _jit); jit_callr(tmp); @@ -1078,11 +1116,11 @@ static void rec_io(struct lightrec_cstate *state, lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false); if (is_tagged) { - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_RW); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_RW); } else { lut_entry = lightrec_get_lut_entry(block); call_to_c_wrapper(state, block, (lut_entry << 16) | offset, - true, C_WRAPPER_RW_GENERIC); + C_WRAPPER_RW_GENERIC); } } @@ -1111,8 +1149,7 @@ static void rec_store_memory(struct lightrec_cstate *cstate, bool add_imm = c.i.imm && ((!state->mirrors_mapped && !no_mask) || (invalidate && ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt)))); - bool need_tmp = !no_mask || addr_offset || add_imm; - bool need_tmp2 = addr_offset || invalidate; + bool need_tmp = !no_mask || addr_offset || add_imm || invalidate; rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); @@ -1134,10 +1171,8 @@ static void rec_store_memory(struct lightrec_cstate *cstate, addr_reg = tmp; } - if (need_tmp2) - tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); - if (addr_offset) { + tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); jit_addi(tmp2, addr_reg, addr_offset); addr_reg2 = tmp2; } else { @@ -1161,20 +1196,20 @@ static void rec_store_memory(struct lightrec_cstate *cstate, tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0); if (c.i.op != OP_SW) { - jit_andi(tmp2, addr_reg, ~3); - addr_reg = tmp2; + jit_andi(tmp, addr_reg, ~3); + addr_reg = tmp; } if (!lut_is_32bit(state)) { - jit_lshi(tmp2, addr_reg, 1); - addr_reg = tmp2; + jit_lshi(tmp, addr_reg, 1); + addr_reg = tmp; } if (addr_reg == rs && c.i.rs == 0) { addr_reg = LIGHTREC_REG_STATE; } else { - jit_addr(tmp2, addr_reg, LIGHTREC_REG_STATE); - addr_reg = tmp2; + jit_addr(tmp, addr_reg, LIGHTREC_REG_STATE); + addr_reg = tmp; } if (lut_is_32bit(state)) @@ -1185,7 +1220,7 @@ static void rec_store_memory(struct lightrec_cstate *cstate, lightrec_free_reg(reg_cache, tmp3); } - if (need_tmp2) + if (addr_offset) lightrec_free_reg(reg_cache, tmp2); if (need_tmp) lightrec_free_reg(reg_cache, tmp); @@ -1217,6 +1252,17 @@ static void rec_store_scratch(struct lightrec_cstate *cstate, 0x1fffffff, false); } +static void rec_store_io(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + return rec_store_memory(cstate, block, offset, code, swap_code, + cstate->state->offset_io, + 0x1fffffff, false); +} + static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, const struct block *block, u16 offset, jit_code_t code, @@ -1232,7 +1278,6 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, jit_note(__FILE__, __LINE__); rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); - rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); tmp = lightrec_alloc_reg_temp(reg_cache, _jit); if (state->offset_ram || state->offset_scratch) @@ -1272,6 +1317,8 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, lightrec_free_reg(reg_cache, tmp2); } + rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0); + if (is_big_endian() && swap_code && c.i.rt) { tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); @@ -1390,6 +1437,9 @@ static void rec_store(struct lightrec_cstate *state, rec_store_direct(state, block, offset, code, swap_code); } break; + case LIGHTREC_IO_DIRECT_HW: + rec_store_io(state, block, offset, code, swap_code); + break; default: rec_io(state, block, offset, true, false); break; @@ -1527,6 +1577,16 @@ static void rec_load_scratch(struct lightrec_cstate *cstate, cstate->state->offset_scratch, 0x1fffffff); } +static void rec_load_io(struct lightrec_cstate *cstate, + const struct block *block, u16 offset, + jit_code_t code, jit_code_t swap_code, bool is_unsigned) +{ + _jit_note(block->_jit, __FILE__, __LINE__); + + rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned, + cstate->state->offset_io, 0x1fffffff); +} + static void rec_load_direct(struct lightrec_cstate *cstate, const struct block *block, u16 offset, jit_code_t code, jit_code_t swap_code, @@ -1652,6 +1712,9 @@ static void rec_load(struct lightrec_cstate *state, const struct block *block, case LIGHTREC_IO_SCRATCH: rec_load_scratch(state, block, offset, code, swap_code, is_unsigned); break; + case LIGHTREC_IO_DIRECT_HW: + rec_load_io(state, block, offset, code, swap_code, is_unsigned); + break; case LIGHTREC_IO_DIRECT: rec_load_direct(state, block, offset, code, swap_code, is_unsigned); break; @@ -1675,8 +1738,10 @@ static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u1 static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset) { + jit_code_t code = is_big_endian() ? jit_code_ldxi_us : jit_code_ldxi_s; + _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_s, jit_code_bswapr_us, false); + rec_load(state, block, offset, code, jit_code_bswapr_us, false); } static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset) @@ -1699,8 +1764,15 @@ static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u1 static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset) { + jit_code_t code; + + if (is_big_endian() && __WORDSIZE == 64) + code = jit_code_ldxi_ui; + else + code = jit_code_ldxi_i; + _jit_name(block->_jit, __func__); - rec_load(state, block, offset, jit_code_ldxi_i, jit_code_bswapr_ui, false); + rec_load(state, block, offset, code, jit_code_bswapr_ui, false); } static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset) @@ -1710,14 +1782,22 @@ static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u } static void rec_break_syscall(struct lightrec_cstate *state, - const struct block *block, u16 offset, bool is_break) + const struct block *block, u16 offset, + u32 exit_code) { + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + u8 tmp; + _jit_note(block->_jit, __FILE__, __LINE__); - if (is_break) - call_to_c_wrapper(state, block, 0, false, C_WRAPPER_BREAK); - else - call_to_c_wrapper(state, block, 0, false, C_WRAPPER_SYSCALL); + tmp = lightrec_alloc_reg_temp(reg_cache, _jit); + + jit_movi(tmp, exit_code); + jit_stxi_i(offsetof(struct lightrec_state, exit_flags), + LIGHTREC_REG_STATE, tmp); + + lightrec_free_reg(reg_cache, tmp); /* TODO: the return address should be "pc - 4" if we're a delay slot */ lightrec_emit_end_of_block(state, block, offset, -1, @@ -1729,14 +1809,14 @@ static void rec_special_SYSCALL(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_break_syscall(state, block, offset, false); + rec_break_syscall(state, block, offset, LIGHTREC_EXIT_SYSCALL); } static void rec_special_BREAK(struct lightrec_cstate *state, const struct block *block, u16 offset) { _jit_name(block->_jit, __func__); - rec_break_syscall(state, block, offset, true); + rec_break_syscall(state, block, offset, LIGHTREC_EXIT_BREAK); } static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset) @@ -1749,7 +1829,7 @@ static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u1 lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false); lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false); - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MTC); if (c.i.op == OP_CP0 && !op_flag_no_ds(block->opcode_list[offset].flags) && @@ -2209,7 +2289,7 @@ static void rec_CP(struct lightrec_cstate *state, jit_name(__func__); jit_note(__FILE__, __LINE__); - call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_CP); + call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_CP); } static void rec_meta_MOV(struct lightrec_cstate *state, @@ -2260,6 +2340,59 @@ static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state, lightrec_free_reg(reg_cache, rt); } +static void rec_meta_MULT2(struct lightrec_cstate *state, + const struct block *block, + u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 reg_lo = get_mult_div_lo(c); + u8 reg_hi = get_mult_div_hi(c); + u32 flags = block->opcode_list[offset].flags; + bool is_signed = c.i.op == OP_META_MULT2; + u8 rs, lo, hi, rflags = 0, hiflags = 0; + + if (!op_flag_no_hi(flags) && c.r.op < 32) { + rflags = is_signed ? REG_EXT : REG_ZEXT; + hiflags = is_signed ? REG_EXT : (REG_EXT | REG_ZEXT); + } + + _jit_name(block->_jit, __func__); + jit_note(__FILE__, __LINE__); + + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, rflags); + + if (!op_flag_no_lo(flags)) { + lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0); + + if (c.r.op < 32) + jit_lshi(lo, rs, c.r.op); + else + jit_movi(lo, 0); + + lightrec_free_reg(reg_cache, lo); + } + + if (!op_flag_no_hi(flags)) { + hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, hiflags); + + if (c.r.op >= 32) + jit_lshi(hi, rs, c.r.op - 32); + else if (is_signed) + jit_rshi(hi, rs, 32 - c.r.op); + else + jit_rshi_u(hi, rs, 32 - c.r.op); + + lightrec_free_reg(reg_cache, hi); + } + + lightrec_free_reg(reg_cache, rs); + + _jit_name(block->_jit, __func__); + jit_note(__FILE__, __LINE__); +} + static const lightrec_rec_func_t rec_standard[64] = { SET_DEFAULT_ELM(rec_standard, unknown_opcode), [OP_SPECIAL] = rec_SPECIAL, @@ -2298,6 +2431,8 @@ static const lightrec_rec_func_t rec_standard[64] = { [OP_META_MOV] = rec_meta_MOV, [OP_META_EXTC] = rec_meta_EXTC_EXTS, [OP_META_EXTS] = rec_meta_EXTC_EXTS, + [OP_META_MULT2] = rec_meta_MULT2, + [OP_META_MULTU2] = rec_meta_MULT2, }; static const lightrec_rec_func_t rec_special[64] = { diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c index 57986d81..43bea83f 100644 --- a/deps/lightrec/interpreter.c +++ b/deps/lightrec/interpreter.c @@ -985,6 +985,33 @@ static u32 int_META_EXTS(struct interpreter *inter) return jump_next(inter); } +static u32 int_META_MULT2(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->regs.gpr; + union code c = inter->op->c; + u32 rs = reg_cache[c.r.rs]; + u8 reg_lo = get_mult_div_lo(c); + u8 reg_hi = get_mult_div_hi(c); + + if (!op_flag_no_lo(inter->op->flags)) { + if (c.r.op < 32) + reg_cache[reg_lo] = rs << c.r.op; + else + reg_cache[reg_lo] = 0; + } + + if (!op_flag_no_hi(inter->op->flags)) { + if (c.r.op >= 32) + reg_cache[reg_hi] = rs << (c.r.op - 32); + else if (c.i.op == OP_META_MULT2) + reg_cache[reg_hi] = (s32) rs >> (32 - c.r.op); + else + reg_cache[reg_hi] = rs >> (32 - c.r.op); + } + + return jump_next(inter); +} + static const lightrec_int_func_t int_standard[64] = { SET_DEFAULT_ELM(int_standard, int_unimplemented), [OP_SPECIAL] = int_SPECIAL, @@ -1023,6 +1050,8 @@ static const lightrec_int_func_t int_standard[64] = { [OP_META_MOV] = int_META_MOV, [OP_META_EXTC] = int_META_EXTC, [OP_META_EXTS] = int_META_EXTS, + [OP_META_MULT2] = int_META_MULT2, + [OP_META_MULTU2] = int_META_MULT2, }; static const lightrec_int_func_t int_special[64] = { @@ -1155,5 +1184,7 @@ u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u3 pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); + return 0; } diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index 4eedef27..1b120db1 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -10,6 +10,7 @@ #include "lightrec-config.h" #include "disassembler.h" #include "lightrec.h" +#include "regcache.h" #if ENABLE_THREADED_COMPILER #include @@ -45,12 +46,24 @@ #define fallthrough do {} while (0) /* fall-through */ +#define container_of(ptr, type, member) \ + ((type *)((void *)(ptr) - offsetof(type, member))) + +#ifdef _MSC_BUILD +# define popcount32(x) __popcnt(x) +# define ffs32(x) (31 - __lzcnt(x)) +#else +# define popcount32(x) __builtin_popcount(x) +# define ffs32(x) (__builtin_ffs(x) - 1) +#endif + /* Flags for (struct block *)->flags */ #define BLOCK_NEVER_COMPILE BIT(0) #define BLOCK_SHOULD_RECOMPILE BIT(1) #define BLOCK_FULLY_TAGGED BIT(2) #define BLOCK_IS_DEAD BIT(3) #define BLOCK_IS_MEMSET BIT(4) +#define BLOCK_NO_OPCODE_LIST BIT(5) #define RAM_SIZE 0x200000 #define BIOS_SIZE 0x80000 @@ -90,9 +103,10 @@ struct block { u32 precompile_date; unsigned int code_size; u16 nb_ops; - u8 flags; #if ENABLE_THREADED_COMPILER - atomic_flag op_list_freed; + _Atomic u8 flags; +#else + u8 flags; #endif }; @@ -111,18 +125,14 @@ enum c_wrappers { C_WRAPPER_RW_GENERIC, C_WRAPPER_MTC, C_WRAPPER_CP, - C_WRAPPER_SYSCALL, - C_WRAPPER_BREAK, C_WRAPPERS_COUNT, }; struct lightrec_cstate { struct lightrec_state *state; - struct jit_node *branches[512]; struct lightrec_branch local_branches[512]; struct lightrec_branch_target targets[512]; - unsigned int nb_branches; unsigned int nb_local_branches; unsigned int nb_targets; unsigned int cycles; @@ -132,6 +142,7 @@ struct lightrec_cstate { struct lightrec_state { struct lightrec_registers regs; + uintptr_t wrapper_regs[NUM_TEMPS]; u32 next_pc; u32 current_cycle; u32 target_cycle; @@ -152,7 +163,7 @@ struct lightrec_state { unsigned int nb_precompile; unsigned int nb_maps; const struct lightrec_mem_map *maps; - uintptr_t offset_ram, offset_bios, offset_scratch; + uintptr_t offset_ram, offset_bios, offset_scratch, offset_io; _Bool with_32bit_lut; _Bool mirrors_mapped; _Bool invalidate_from_dma_only; @@ -262,7 +273,8 @@ void lightrec_free_cstate(struct lightrec_cstate *cstate); union code lightrec_read_opcode(struct lightrec_state *state, u32 pc); int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block); -void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block); +void lightrec_free_opcode_list(struct lightrec_state *state, + struct opcode *list); unsigned int lightrec_cycles_of_opcode(union code code); @@ -281,4 +293,41 @@ static inline s16 s16_max(s16 a, s16 b) return a > b ? a : b; } +static inline _Bool block_has_flag(struct block *block, u8 flag) +{ +#if ENABLE_THREADED_COMPILER + return atomic_load_explicit(&block->flags, memory_order_relaxed) & flag; +#else + return block->flags & flag; +#endif +} + +static inline u8 block_set_flags(struct block *block, u8 mask) +{ +#if ENABLE_THREADED_COMPILER + return atomic_fetch_or_explicit(&block->flags, mask, + memory_order_relaxed); +#else + u8 flags = block->flags; + + block->flags |= mask; + + return flags; +#endif +} + +static inline u8 block_clear_flags(struct block *block, u8 mask) +{ +#if ENABLE_THREADED_COMPILER + return atomic_fetch_and_explicit(&block->flags, ~mask, + memory_order_relaxed); +#else + u8 flags = block->flags; + + block->flags &= ~mask; + + return flags; +#endif +} + #endif /* __LIGHTREC_PRIVATE_H__ */ diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index ffa40f09..497cc685 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -342,12 +342,14 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) struct opcode *op; bool was_tagged; u16 offset = (u16)arg; + u16 old_flags; block = lightrec_find_block_from_lut(state->block_cache, arg >> 16, state->next_pc); if (unlikely(!block)) { pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n", state->next_pc, offset); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); return; } @@ -357,11 +359,14 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) lightrec_rw_helper(state, op->c, &op->flags, block); if (!was_tagged) { - pr_debug("Opcode of block at PC 0x%08x has been tagged - flag " - "for recompilation\n", block->pc); + old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); - block->flags |= BLOCK_SHOULD_RECOMPILE; - lut_write(state, lut_offset(block->pc), NULL); + if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { + pr_debug("Opcode of block at PC 0x%08x has been tagged" + " - flag for recompilation\n", block->pc); + + lut_write(state, lut_offset(block->pc), NULL); + } } } @@ -418,12 +423,27 @@ static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg) u32 lightrec_mfc(struct lightrec_state *state, union code op) { + u32 val; + if (op.i.op == OP_CP0) return state->regs.cp0[op.r.rd]; else if (op.r.rs == OP_CP2_BASIC_MFC2) return lightrec_mfc2(state, op.r.rd); - else - return state->regs.cp2c[op.r.rd]; + + val = state->regs.cp2c[op.r.rd]; + + switch (op.r.rd) { + case 4: + case 12: + case 20: + case 26: + case 27: + case 29: + case 30: + return (u32)(s16)val; + default: + return val; + } } static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data) @@ -586,31 +606,26 @@ static void lightrec_cp_cb(struct lightrec_state *state, u32 arg) lightrec_cp(state, (union code) arg); } -static void lightrec_syscall_cb(struct lightrec_state *state) -{ - lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL); -} - -static void lightrec_break_cb(struct lightrec_state *state) -{ - lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK); -} - static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc) { struct block *block = lightrec_find_block(state->block_cache, pc); + u8 old_flags; if (block && lightrec_block_is_outdated(state, block)) { pr_debug("Block at PC 0x%08x is outdated!\n", block->pc); - /* Make sure the recompiler isn't processing the block we'll - * destroy */ - if (ENABLE_THREADED_COMPILER) - lightrec_recompiler_remove(state->rec, block); + old_flags = block_set_flags(block, BLOCK_IS_DEAD); + if (!(old_flags & BLOCK_IS_DEAD)) { + /* Make sure the recompiler isn't processing the block + * we'll destroy */ + if (ENABLE_THREADED_COMPILER) + lightrec_recompiler_remove(state->rec, block); + + lightrec_unregister_block(state->block_cache, block); + remove_from_code_lut(state->block_cache, block); + lightrec_free_block(state, block); + } - lightrec_unregister_block(state->block_cache, block); - remove_from_code_lut(state->block_cache, block); - lightrec_free_block(state, block); block = NULL; } @@ -645,19 +660,18 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (unlikely(!block)) break; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) { + if (OPT_REPLACE_MEMSET && + block_has_flag(block, BLOCK_IS_MEMSET)) { func = state->memset_func; break; } - should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE && - !(block->flags & BLOCK_IS_DEAD); + should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) && + !block_has_flag(block, BLOCK_IS_DEAD); if (unlikely(should_recompile)) { pr_debug("Block at PC 0x%08x should recompile\n", pc); - lightrec_unregister(MEM_FOR_CODE, block->code_size); - if (ENABLE_THREADED_COMPILER) { lightrec_recompiler_add(state->rec, block); } else { @@ -677,12 +691,12 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) if (likely(func)) break; - if (unlikely(block->flags & BLOCK_NEVER_COMPILE)) { + if (unlikely(block_has_flag(block, BLOCK_NEVER_COMPILE))) { pc = lightrec_emulate_block(state, block, pc); } else if (!ENABLE_THREADED_COMPILER) { /* Block wasn't compiled yet - run the interpreter */ - if (block->flags & BLOCK_FULLY_TAGGED) + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) pr_debug("Block fully tagged, skipping first pass\n"); else if (ENABLE_FIRST_PASS && likely(!should_recompile)) pc = lightrec_emulate_block(state, block, pc); @@ -693,6 +707,15 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) state->exit_flags = LIGHTREC_EXIT_NOMEM; return NULL; } + } else if (unlikely(block_has_flag(block, BLOCK_IS_DEAD))) { + /* + * If the block is dead but has never been compiled, + * then its function pointer is NULL and we cannot + * execute the block. In that case, reap all the dead + * blocks now, and in the next loop we will create a + * new block. + */ + lightrec_reaper_reap(state->reaper); } else { lightrec_recompiler_add(state->rec, block); } @@ -706,16 +729,6 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) return func; } -static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta, - void (*f)(struct lightrec_state *, u32), u32 arg) -{ - state->current_cycle = state->target_cycle - cycles_delta; - - (*f)(state, arg); - - return state->target_cycle - state->current_cycle; -} - static void * lightrec_alloc_code(struct lightrec_state *state, size_t size) { void *code; @@ -821,9 +834,8 @@ static struct block * generate_wrapper(struct lightrec_state *state) struct block *block; jit_state_t *_jit; unsigned int i; - int stack_ptr; - jit_node_t *to_tramp, *to_fn_epilog; jit_node_t *addr[C_WRAPPERS_COUNT - 1]; + jit_node_t *to_end[C_WRAPPERS_COUNT - 1]; block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -840,67 +852,69 @@ static struct block * generate_wrapper(struct lightrec_state *state) jit_prolog(); jit_tramp(256); - /* Add entry points; separate them by opcodes that increment - * LIGHTREC_REG_STATE (since we cannot touch other registers). - * The difference will then tell us which C function to call. */ + /* Add entry points */ for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { - jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8); + jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[i])); + to_end[i - 1] = jit_b(); addr[i - 1] = jit_indirect(); } + jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, c_wrappers[0])); + + for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) + jit_patch(to_end[i]); + jit_epilog(); jit_prolog(); - stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS); - /* Save all temporaries on stack */ - for (i = 0; i < NUM_TEMPS; i++) - jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i)); + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_stxi(offsetof(struct lightrec_state, wrapper_regs[i]), + LIGHTREC_REG_STATE, JIT_R(i + FIRST_TEMP)); + } + } - jit_getarg(JIT_R1, jit_arg()); + jit_getarg(JIT_R2, jit_arg()); - /* Jump to the trampoline */ - to_tramp = jit_jmpi(); + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_R2); - /* The trampoline will jump back here */ - to_fn_epilog = jit_label(); + jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); - /* Restore temporaries from stack */ - for (i = 0; i < NUM_TEMPS; i++) - jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t)); + /* state->current_cycle = state->target_cycle - delta; */ + jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, LIGHTREC_REG_CYCLE); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE); - jit_ret(); - jit_epilog(); + /* Call the wrapper function */ + jit_finishr(JIT_R1); - /* Trampoline entry point. - * The sole purpose of the trampoline is to cheese Lightning not to - * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we - * do want to return to the caller with this register modified. */ - jit_prolog(); - jit_tramp(256); - jit_patch(to_tramp); - - /* Retrieve the wrapper function */ - jit_ldxi(JIT_R0, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, c_wrappers)); - - /* Restore LIGHTREC_REG_STATE to its correct value */ - jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state); + /* delta = state->target_cycle - state->current_cycle */; + jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, current_cycle)); + jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(LIGHTREC_REG_CYCLE, JIT_R1, LIGHTREC_REG_CYCLE); - jit_prepare(); - jit_pushargr(LIGHTREC_REG_STATE); - jit_pushargr(LIGHTREC_REG_CYCLE); - jit_pushargr(JIT_R0); - jit_pushargr(JIT_R1); - jit_finishi(c_function_wrapper); - jit_retval_i(LIGHTREC_REG_CYCLE); + /* Restore temporaries from stack */ + for (i = 0; i < NUM_TEMPS; i++) { + if (i + FIRST_TEMP != 1) { + jit_ldxi(JIT_R(i + FIRST_TEMP), LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, wrapper_regs[i])); + } + } - jit_patch_at(jit_jmpi(), to_fn_epilog); + jit_ret(); jit_epilog(); block->_jit = _jit; block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; block->function = lightrec_emit_code(state, block, _jit, @@ -974,12 +988,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_prolog(); jit_frame(256); - jit_getarg(JIT_R0, jit_arg()); + jit_getarg(JIT_V1, jit_arg()); jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg()); /* Force all callee-saved registers to be pushed on the stack */ for (i = 0; i < NUM_REGS; i++) - jit_movr(JIT_V(i), JIT_V(i)); + jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG)); /* Pass lightrec_state structure to blocks, using the last callee-saved * register that Lightning provides */ @@ -988,13 +1002,15 @@ static struct block * generate_dispatcher(struct lightrec_state *state) loop = jit_label(); /* Call the block's code */ - jit_jmpr(JIT_R0); + jit_jmpr(JIT_V1); if (OPT_REPLACE_MEMSET) { /* Blocks will jump here when they need to call * lightrec_memset() */ addr3 = jit_indirect(); + jit_movr(JIT_V1, LIGHTREC_REG_CYCLE); + jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_finishi(lightrec_memset); @@ -1002,8 +1018,8 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, regs.gpr[31])); - jit_retval(JIT_R0); - jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0); + jit_retval(LIGHTREC_REG_CYCLE); + jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE); } /* The block will jump here, with the number of cycles remaining in @@ -1018,25 +1034,30 @@ static struct block * generate_dispatcher(struct lightrec_state *state) to_end = jit_blei(LIGHTREC_REG_CYCLE, 0); /* Convert next PC to KUNSEG and avoid mirrors */ - jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); - jit_rshi_u(JIT_R1, JIT_R0, 28); + jit_andi(JIT_V1, JIT_V0, 0x10000000 | (RAM_SIZE - 1)); + jit_rshi_u(JIT_R1, JIT_V1, 28); jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1); jit_addi(JIT_R2, JIT_R2, RAM_SIZE); - jit_movnr(JIT_R0, JIT_R2, JIT_R1); + jit_movnr(JIT_V1, JIT_R2, JIT_R1); /* If possible, use the code LUT */ if (!lut_is_32bit(state)) - jit_lshi(JIT_R0, JIT_R0, 1); - jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE); + jit_lshi(JIT_V1, JIT_V1, 1); + jit_addr(JIT_V1, JIT_V1, LIGHTREC_REG_STATE); offset = offsetof(struct lightrec_state, code_lut); if (lut_is_32bit(state)) - jit_ldxi_ui(JIT_R0, JIT_R0, offset); + jit_ldxi_ui(JIT_V1, JIT_V1, offset); else - jit_ldxi(JIT_R0, JIT_R0, offset); + jit_ldxi(JIT_V1, JIT_V1, offset); /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); + + /* The code LUT will be set to this address when the block at the target + * PC has been preprocessed but not yet compiled by the threaded + * recompiler */ + addr = jit_indirect(); /* Slow path: call C function get_next_block_func() */ @@ -1044,22 +1065,22 @@ static struct block * generate_dispatcher(struct lightrec_state *state) /* We may call the interpreter - update state->current_cycle */ jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, target_cycle)); - jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); + jit_subr(JIT_V1, JIT_R2, LIGHTREC_REG_CYCLE); jit_stxi_i(offsetof(struct lightrec_state, current_cycle), - LIGHTREC_REG_STATE, JIT_R1); + LIGHTREC_REG_STATE, JIT_V1); } - /* The code LUT will be set to this address when the block at the target - * PC has been preprocessed but not yet compiled by the threaded - * recompiler */ - addr = jit_indirect(); - - /* Get the next block */ jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); jit_pushargr(JIT_V0); + + /* Save the cycles register if needed */ + if (!(ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES)) + jit_movr(JIT_V0, LIGHTREC_REG_CYCLE); + + /* Get the next block */ jit_finishi(&get_next_block_func); - jit_retval(JIT_R0); + jit_retval(JIT_V1); if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* The interpreter may have updated state->current_cycle and @@ -1069,10 +1090,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, target_cycle)); jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); + } else { + jit_movr(LIGHTREC_REG_CYCLE, JIT_V0); } /* If we get non-NULL, loop */ - jit_patch_at(jit_bnei(JIT_R0, 0), loop); + jit_patch_at(jit_bnei(JIT_V1, 0), loop); /* When exiting, the recompiled code will jump to that address */ jit_note(__FILE__, __LINE__); @@ -1083,7 +1106,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state) block->_jit = _jit; block->opcode_list = NULL; - block->flags = 0; + block->flags = BLOCK_NO_OPCODE_LIST; block->nb_ops = 0; block->function = lightrec_emit_code(state, block, _jit, @@ -1127,11 +1150,13 @@ unsigned int lightrec_cycles_of_opcode(union code code) return 2; } -void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block) +void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *ops) { + struct opcode_list *list = container_of(ops, struct opcode_list, ops); + lightrec_free(state, MEM_FOR_IR, - sizeof(*block->opcode_list) * block->nb_ops, - block->opcode_list); + sizeof(*list) + list->nb_ops * sizeof(struct opcode), + list); } static unsigned int lightrec_get_mips_block_len(const u32 *src) @@ -1153,25 +1178,28 @@ static unsigned int lightrec_get_mips_block_len(const u32 *src) static struct opcode * lightrec_disassemble(struct lightrec_state *state, const u32 *src, unsigned int *len) { - struct opcode *list; + struct opcode_list *list; unsigned int i, length; length = lightrec_get_mips_block_len(src); - list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length); + list = lightrec_malloc(state, MEM_FOR_IR, + sizeof(*list) + sizeof(struct opcode) * length); if (!list) { pr_err("Unable to allocate memory\n"); return NULL; } + list->nb_ops = (u16) length; + for (i = 0; i < length; i++) { - list[i].opcode = LE32TOH(src[i]); - list[i].flags = 0; + list->ops[i].opcode = LE32TOH(src[i]); + list->ops[i].flags = 0; } *len = length * sizeof(u32); - return list; + return list->ops; } static struct block * lightrec_precompile_block(struct lightrec_state *state, @@ -1179,11 +1207,12 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, { struct opcode *list; struct block *block; - void *host; + void *host, *addr; const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc)); const u32 *code = (u32 *) host; unsigned int length; bool fully_tagged; + u8 block_flags = 0; if (!map) return NULL; @@ -1209,9 +1238,6 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, block->flags = 0; block->code_size = 0; block->precompile_date = state->current_cycle; -#if ENABLE_THREADED_COMPILER - block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT; -#endif block->nb_ops = length / sizeof(u32); lightrec_optimize(state, block); @@ -1230,17 +1256,23 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, /* If the first opcode is an 'impossible' branch, never compile the * block */ if (should_emulate(block->opcode_list)) - block->flags |= BLOCK_NEVER_COMPILE; + block_flags |= BLOCK_NEVER_COMPILE; fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) - block->flags |= BLOCK_FULLY_TAGGED; + block_flags |= BLOCK_FULLY_TAGGED; - if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) - lut_write(state, lut_offset(pc), state->memset_func); + if (block_flags) + block_set_flags(block, block_flags); block->hash = lightrec_calculate_block_hash(block); + if (OPT_REPLACE_MEMSET && block_has_flag(block, BLOCK_IS_MEMSET)) + addr = state->memset_func; + else + addr = state->get_next_block; + lut_write(state, lut_offset(pc), addr); + pr_debug("Recompile count: %u\n", state->nb_precompile++); return block; @@ -1310,24 +1342,31 @@ static void lightrec_reap_function(struct lightrec_state *state, void *data) lightrec_free_function(state, data); } +static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data) +{ + lightrec_free_opcode_list(state, data); +} + int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block) { struct lightrec_state *state = cstate->state; struct lightrec_branch_target *target; - bool op_list_freed = false, fully_tagged = false; + bool fully_tagged = false; struct block *block2; struct opcode *elm; jit_state_t *_jit, *oldjit; jit_node_t *start_of_block; bool skip_next = false; void *old_fn, *new_fn; + size_t old_code_size; unsigned int i, j; + u8 old_flags; u32 offset; fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) - block->flags |= BLOCK_FULLY_TAGGED; + block_set_flags(block, BLOCK_FULLY_TAGGED); _jit = jit_new_state(); if (!_jit) @@ -1335,11 +1374,11 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, oldjit = block->_jit; old_fn = block->function; + old_code_size = block->code_size; block->_jit = _jit; lightrec_regcache_reset(cstate->reg_cache); cstate->cycles = 0; - cstate->nb_branches = 0; cstate->nb_local_branches = 0; cstate->nb_targets = 0; @@ -1377,9 +1416,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, cstate->cycles += lightrec_cycles_of_opcode(elm->c); } - for (i = 0; i < cstate->nb_branches; i++) - jit_patch(cstate->branches[i]); - for (i = 0; i < cstate->nb_local_branches; i++) { struct lightrec_branch *branch = &cstate->local_branches[i]; @@ -1403,7 +1439,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, pr_err("Unable to find branch target\n"); } - jit_patch_abs(jit_jmpi(), state->eob_wrapper_func); jit_ret(); jit_epilog(); @@ -1412,22 +1447,24 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, if (!ENABLE_THREADED_COMPILER) pr_err("Unable to compile block!\n"); block->_jit = oldjit; + jit_clear_state(); _jit_destroy_state(_jit); return -ENOMEM; } + /* Pause the reaper, because lightrec_reset_lut_offset() may try to set + * the old block->function pointer to the code LUT. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); + block->function = new_fn; - block->flags &= ~BLOCK_SHOULD_RECOMPILE; + block_clear_flags(block, BLOCK_SHOULD_RECOMPILE); /* Add compiled function to the LUT */ lut_write(state, lut_offset(block->pc), block->function); - if (ENABLE_THREADED_COMPILER) { - /* Since we might try to reap the same block multiple times, - * we need the reaper to wait until everything has been - * submitted, so that the duplicate entries can be dropped. */ - lightrec_reaper_pause(state->reaper); - } + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_continue(state->reaper); /* Detect old blocks that have been covered by the new one */ for (i = 0; i < cstate->nb_targets; i++) { @@ -1437,6 +1474,13 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, continue; offset = block->pc + target->offset * sizeof(u32); + + /* Pause the reaper while we search for the block until we set + * the BLOCK_IS_DEAD flag, otherwise the block may be removed + * under our feet. */ + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_pause(state->reaper); + block2 = lightrec_find_block(state->block_cache, offset); if (block2) { /* No need to check if block2 is compilable - it must @@ -1444,12 +1488,16 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, /* Set the "block dead" flag to prevent the dynarec from * recompiling this block */ - block2->flags |= BLOCK_IS_DEAD; + old_flags = block_set_flags(block2, BLOCK_IS_DEAD); + } + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_continue(state->reaper); /* If block2 was pending for compilation, cancel it. * If it's being compiled right now, wait until it * finishes. */ - if (ENABLE_THREADED_COMPILER) + if (block2) lightrec_recompiler_remove(state->rec, block2); } @@ -1464,20 +1512,17 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, "0x%08x\n", block2->pc, block->pc); /* Finally, reap the block. */ - if (ENABLE_THREADED_COMPILER) { + if (!ENABLE_THREADED_COMPILER) { + lightrec_unregister_block(state->block_cache, block2); + lightrec_free_block(state, block2); + } else if (!(old_flags & BLOCK_IS_DEAD)) { lightrec_reaper_add(state->reaper, lightrec_reap_block, block2); - } else { - lightrec_unregister_block(state->block_cache, block2); - lightrec_free_block(state, block2); } } } - if (ENABLE_THREADED_COMPILER) - lightrec_reaper_continue(state->reaper); - if (ENABLE_DISASSEMBLER) { pr_debug("Compiling block at PC: 0x%08x\n", block->pc); jit_disassemble(); @@ -1485,15 +1530,20 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, jit_clear_state(); -#if ENABLE_THREADED_COMPILER if (fully_tagged) - op_list_freed = atomic_flag_test_and_set(&block->op_list_freed); -#endif - if (fully_tagged && !op_list_freed) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (fully_tagged && !(old_flags & BLOCK_NO_OPCODE_LIST)) { pr_debug("Block PC 0x%08x is fully tagged" " - free opcode list\n", block->pc); - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + + if (ENABLE_THREADED_COMPILER) { + lightrec_reaper_add(state->reaper, + lightrec_reap_opcode_list, + block->opcode_list); + } else { + lightrec_free_opcode_list(state, block->opcode_list); + } } if (oldjit) { @@ -1509,6 +1559,8 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, _jit_destroy_state(oldjit); lightrec_free_function(state, old_fn); } + + lightrec_unregister(MEM_FOR_CODE, old_code_size); } return 0; @@ -1561,20 +1613,24 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle) return state->next_pc; } -u32 lightrec_execute_one(struct lightrec_state *state, u32 pc) +u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc, + u32 target_cycle) { - return lightrec_execute(state, pc, state->current_cycle); -} - -u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) -{ - struct block *block = lightrec_get_block(state, pc); - if (!block) - return 0; + struct block *block; state->exit_flags = LIGHTREC_EXIT_NORMAL; + state->target_cycle = target_cycle; + + do { + block = lightrec_get_block(state, pc); + if (!block) + break; + + pc = lightrec_emulate_block(state, block, pc); - pc = lightrec_emulate_block(state, block, pc); + if (ENABLE_THREADED_COMPILER) + lightrec_reaper_reap(state->reaper); + } while (state->current_cycle < state->target_cycle); if (LOG_LEVEL >= INFO_L) lightrec_print_info(state); @@ -1584,9 +1640,13 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc) void lightrec_free_block(struct lightrec_state *state, struct block *block) { + u8 old_flags; + lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32)); - if (block->opcode_list) - lightrec_free_opcode_list(state, block); + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); + + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) + lightrec_free_opcode_list(state, block->opcode_list); if (block->_jit) _jit_destroy_state(block->_jit); if (block->function) { @@ -1705,8 +1765,6 @@ struct lightrec_state * lightrec_init(char *argv0, state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb; state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb; state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb; - state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb; - state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb; map = &state->maps[PSX_MAP_BIOS]; state->offset_bios = (uintptr_t)map->address - map->pc; @@ -1714,6 +1772,9 @@ struct lightrec_state * lightrec_init(char *argv0, map = &state->maps[PSX_MAP_SCRATCH_PAD]; state->offset_scratch = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_HW_REGISTERS]; + state->offset_io = (uintptr_t)map->address - map->pc; + map = &state->maps[PSX_MAP_KERNEL_USER_RAM]; state->offset_ram = (uintptr_t)map->address - map->pc; @@ -1725,6 +1786,7 @@ struct lightrec_state * lightrec_init(char *argv0, if (state->offset_bios == 0 && state->offset_scratch == 0 && state->offset_ram == 0 && + state->offset_io == 0 && state->mirrors_mapped) { pr_info("Memory map is perfect. Emitted code will be best.\n"); } else { diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h index 3ea8e656..9613da36 100644 --- a/deps/lightrec/lightrec.h +++ b/deps/lightrec/lightrec.h @@ -87,6 +87,7 @@ struct lightrec_mem_map { struct lightrec_ops { void (*cop2_op)(struct lightrec_state *state, u32 op); void (*enable_ram)(struct lightrec_state *state, _Bool enable); + _Bool (*hw_direct)(u32 kaddr, _Bool is_write, u8 size); }; struct lightrec_registers { @@ -105,8 +106,8 @@ __api void lightrec_destroy(struct lightrec_state *state); __api u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle); -__api u32 lightrec_execute_one(struct lightrec_state *state, u32 pc); -__api u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc); +__api u32 lightrec_run_interpreter(struct lightrec_state *state, + u32 pc, u32 target_cycle); __api void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len); __api void lightrec_invalidate_all(struct lightrec_state *state); diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 8da84eee..2eba60ed 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -119,11 +119,31 @@ static u64 opcode_read_mask(union code op) } } -static u64 opcode_write_mask(union code op) +static u64 mult_div_write_mask(union code op) { u64 flags; + if (!OPT_FLAG_MULT_DIV) + return BIT(REG_LO) | BIT(REG_HI); + + if (op.r.rd) + flags = BIT(op.r.rd); + else + flags = BIT(REG_LO); + if (op.r.imm) + flags |= BIT(op.r.imm); + else + flags |= BIT(REG_HI); + + return flags; +} + +static u64 opcode_write_mask(union code op) +{ switch (op.i.op) { + case OP_META_MULT2: + case OP_META_MULTU2: + return mult_div_write_mask(op); case OP_SPECIAL: switch (op.r.op) { case OP_SPECIAL_JR: @@ -134,18 +154,7 @@ static u64 opcode_write_mask(union code op) case OP_SPECIAL_MULTU: case OP_SPECIAL_DIV: case OP_SPECIAL_DIVU: - if (!OPT_FLAG_MULT_DIV) - return BIT(REG_LO) | BIT(REG_HI); - - if (op.r.rd) - flags = BIT(op.r.rd); - else - flags = BIT(REG_LO); - if (op.r.imm) - flags |= BIT(op.r.imm); - else - flags |= BIT(REG_HI); - return flags; + return mult_div_write_mask(op); case OP_SPECIAL_MTHI: return BIT(REG_HI); case OP_SPECIAL_MTLO: @@ -361,6 +370,22 @@ static bool opcode_is_store(union code op) } } +static u8 opcode_get_io_size(union code op) +{ + switch (op.i.op) { + case OP_LB: + case OP_LBU: + case OP_SB: + return 8; + case OP_LH: + case OP_LHU: + case OP_SH: + return 16; + default: + return 32; + } +} + bool opcode_is_io(union code op) { return opcode_is_load(op) || opcode_is_store(op); @@ -601,10 +626,48 @@ static u32 lightrec_propagate_consts(const struct opcode *op, known &= ~BIT(c.r.rd); } break; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + case OP_SPECIAL_DIV: + case OP_SPECIAL_DIVU: + if (OPT_FLAG_MULT_DIV && c.r.rd) + known &= ~BIT(c.r.rd); + if (OPT_FLAG_MULT_DIV && c.r.imm) + known &= ~BIT(c.r.imm); + break; default: break; } break; + case OP_META_MULT2: + case OP_META_MULTU2: + if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) { + if (c.r.rd) { + known |= BIT(c.r.rd); + + if (c.r.op < 32) + v[c.r.rd] = v[c.r.rs] << c.r.op; + else + v[c.r.rd] = 0; + } + + if (c.r.imm) { + known |= BIT(c.r.imm); + + if (c.r.op >= 32) + v[c.r.imm] = v[c.r.rs] << (c.r.op - 32); + else if (c.i.op == OP_META_MULT2) + v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op); + else + v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op); + } + } else { + if (OPT_FLAG_MULT_DIV && c.r.rd) + known &= ~BIT(c.r.rd); + if (OPT_FLAG_MULT_DIV && c.r.imm) + known &= ~BIT(c.r.imm); + } + break; case OP_REGIMM: break; case OP_ADDI: @@ -911,7 +974,8 @@ static int lightrec_transform_branches(struct lightrec_state *state, op->i.imm = offset; } - default: /* fall-through */ + fallthrough; + default: break; } } @@ -919,6 +983,11 @@ static int lightrec_transform_branches(struct lightrec_state *state, return 0; } +static inline bool is_power_of_two(u32 value) +{ + return popcount32(value) == 1; +} + static int lightrec_transform_ops(struct lightrec_state *state, struct block *block) { struct opcode *list = block->opcode_list; @@ -926,6 +995,7 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl u32 known = BIT(0); u32 values[32] = { 0 }; unsigned int i; + u8 tmp; for (i = 0; i < block->nb_ops; i++) { prev = op; @@ -1000,6 +1070,28 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl op->r.rs = op->r.rt; } break; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + if ((known & BIT(op->r.rs)) && + is_power_of_two(values[op->r.rs])) { + tmp = op->c.i.rs; + op->c.i.rs = op->c.i.rt; + op->c.i.rt = tmp; + } else if (!(known & BIT(op->r.rt)) || + !is_power_of_two(values[op->r.rt])) { + break; + } + + pr_debug("Multiply by power-of-two: %u\n", + values[op->r.rt]); + + if (op->r.op == OP_SPECIAL_MULT) + op->i.op = OP_META_MULT2; + else + op->i.op = OP_META_MULTU2; + + op->r.op = ffs32(values[op->r.rt]); + break; case OP_SPECIAL_OR: case OP_SPECIAL_ADD: case OP_SPECIAL_ADDU: @@ -1028,6 +1120,64 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl return 0; } +static bool lightrec_can_switch_delay_slot(union code op, union code next_op) +{ + switch (op.i.op) { + case OP_SPECIAL: + switch (op.r.op) { + case OP_SPECIAL_JALR: + if (opcode_reads_register(next_op, op.r.rd) || + opcode_writes_register(next_op, op.r.rd)) + return false; + fallthrough; + case OP_SPECIAL_JR: + if (opcode_writes_register(next_op, op.r.rs)) + return false; + fallthrough; + default: + break; + } + fallthrough; + case OP_J: + break; + case OP_JAL: + if (opcode_reads_register(next_op, 31) || + opcode_writes_register(next_op, 31)) + return false;; + + break; + case OP_BEQ: + case OP_BNE: + if (op.i.rt && opcode_writes_register(next_op, op.i.rt)) + return false; + fallthrough; + case OP_BLEZ: + case OP_BGTZ: + if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) + return false; + break; + case OP_REGIMM: + switch (op.r.rt) { + case OP_REGIMM_BLTZAL: + case OP_REGIMM_BGEZAL: + if (opcode_reads_register(next_op, 31) || + opcode_writes_register(next_op, 31)) + return false; + fallthrough; + case OP_REGIMM_BLTZ: + case OP_REGIMM_BGEZ: + if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) + return false; + break; + } + fallthrough; + default: + break; + } + + return true; +} + static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block) { struct opcode *list, *next = &block->opcode_list[0]; @@ -1050,71 +1200,20 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc !op_flag_no_ds(block->opcode_list[i - 1].flags)) continue; - if (op_flag_sync(list->flags) || op_flag_sync(next->flags)) + if (op_flag_sync(next->flags)) continue; - switch (list->i.op) { - case OP_SPECIAL: - switch (op.r.op) { - case OP_SPECIAL_JALR: - if (opcode_reads_register(next_op, op.r.rd) || - opcode_writes_register(next_op, op.r.rd)) - continue; - fallthrough; - case OP_SPECIAL_JR: - if (opcode_writes_register(next_op, op.r.rs)) - continue; - fallthrough; - default: - break; - } - fallthrough; - case OP_J: - break; - case OP_JAL: - if (opcode_reads_register(next_op, 31) || - opcode_writes_register(next_op, 31)) - continue; - else - break; - case OP_BEQ: - case OP_BNE: - if (op.i.rt && opcode_writes_register(next_op, op.i.rt)) - continue; - fallthrough; - case OP_BLEZ: - case OP_BGTZ: - if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) - continue; - break; - case OP_REGIMM: - switch (op.r.rt) { - case OP_REGIMM_BLTZAL: - case OP_REGIMM_BGEZAL: - if (opcode_reads_register(next_op, 31) || - opcode_writes_register(next_op, 31)) - continue; - fallthrough; - case OP_REGIMM_BLTZ: - case OP_REGIMM_BGEZ: - if (op.i.rs && - opcode_writes_register(next_op, op.i.rs)) - continue; - break; - } - fallthrough; - default: - break; - } + if (!lightrec_can_switch_delay_slot(list->c, next_op)) + continue; pr_debug("Swap branch and delay slot opcodes " "at offsets 0x%x / 0x%x\n", i << 2, (i + 1) << 2); - flags = next->flags; + flags = next->flags | (list->flags & LIGHTREC_SYNC); list->c = next_op; next->c = op; - next->flags = list->flags | LIGHTREC_NO_DS; + next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC; list->flags = flags | LIGHTREC_NO_DS; } @@ -1123,7 +1222,7 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size) { - struct opcode *list; + struct opcode_list *list, *old_list; if (new_size >= block->nb_ops) { pr_err("Invalid shrink size (%u vs %u)\n", @@ -1131,19 +1230,20 @@ static int shrink_opcode_list(struct lightrec_state *state, struct block *block, return -EINVAL; } - list = lightrec_malloc(state, MEM_FOR_IR, - sizeof(*list) * new_size); + sizeof(*list) + sizeof(struct opcode) * new_size); if (!list) { pr_err("Unable to allocate memory\n"); return -ENOMEM; } - memcpy(list, block->opcode_list, sizeof(*list) * new_size); + old_list = container_of(block->opcode_list, struct opcode_list, ops); + memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size); - lightrec_free_opcode_list(state, block); - block->opcode_list = list; + lightrec_free_opcode_list(state, block->opcode_list); + list->nb_ops = new_size; block->nb_ops = new_size; + block->opcode_list = list->ops; pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n", block->pc, new_size); @@ -1449,6 +1549,7 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) u32 values[32] = { 0 }; unsigned int i; u32 val, kunseg_val; + bool no_mask; for (i = 0; i < block->nb_ops; i++) { prev = list; @@ -1483,7 +1584,7 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) kunseg(values[list->i.rs]) < (kunseg(block->pc) + block->nb_ops * 4)) { pr_debug("Self-modifying block detected\n"); - block->flags |= BLOCK_NEVER_COMPILE; + block_set_flags(block, BLOCK_NEVER_COMPILE); list->flags |= LIGHTREC_SMC; } } @@ -1505,10 +1606,11 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) psx_map = lightrec_get_map_idx(state, kunseg_val); list->flags &= ~LIGHTREC_IO_MASK; + no_mask = val == kunseg_val; switch (psx_map) { case PSX_MAP_KERNEL_USER_RAM: - if (val == kunseg_val) + if (no_mask) list->flags |= LIGHTREC_NO_MASK; fallthrough; case PSX_MAP_MIRROR1: @@ -1516,19 +1618,36 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) case PSX_MAP_MIRROR3: pr_debug("Flaging opcode %u as RAM access\n", i); list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM); + if (no_mask && state->mirrors_mapped) + list->flags |= LIGHTREC_NO_MASK; break; case PSX_MAP_BIOS: pr_debug("Flaging opcode %u as BIOS access\n", i); list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS); + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; break; case PSX_MAP_SCRATCH_PAD: pr_debug("Flaging opcode %u as scratchpad access\n", i); list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH); + if (no_mask) + list->flags |= LIGHTREC_NO_MASK; /* Consider that we're never going to run code from * the scratchpad. */ list->flags |= LIGHTREC_NO_INVALIDATE; break; + case PSX_MAP_HW_REGISTERS: + if (state->ops.hw_direct && + state->ops.hw_direct(kunseg_val, + opcode_is_store(list->c), + opcode_get_io_size(list->c))) { + pr_debug("Flagging opcode %u as direct I/O access\n", + i); + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW); + break; + } + fallthrough; default: pr_debug("Flagging opcode %u as I/O access\n", i); @@ -1591,6 +1710,9 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, } return mflo ? REG_LO : REG_HI; + case OP_META_MULT2: + case OP_META_MULTU2: + return 0; case OP_SPECIAL: switch (op->r.op) { case OP_SPECIAL_MULT: @@ -1736,20 +1858,26 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block * if (prev) known = lightrec_propagate_consts(list, prev, known, values); - if (list->i.op != OP_SPECIAL) - continue; - - switch (list->r.op) { - case OP_SPECIAL_DIV: - case OP_SPECIAL_DIVU: - /* If we are dividing by a non-zero constant, don't - * emit the div-by-zero check. */ - if (lightrec_always_skip_div_check() || - (known & BIT(list->c.r.rt) && values[list->c.r.rt])) - list->flags |= LIGHTREC_NO_DIV_CHECK; + switch (list->i.op) { + case OP_SPECIAL: + switch (list->r.op) { + case OP_SPECIAL_DIV: + case OP_SPECIAL_DIVU: + /* If we are dividing by a non-zero constant, don't + * emit the div-by-zero check. */ + if (lightrec_always_skip_div_check() || + ((known & BIT(list->c.r.rt)) && values[list->c.r.rt])) + list->flags |= LIGHTREC_NO_DIV_CHECK; + fallthrough; + case OP_SPECIAL_MULT: + case OP_SPECIAL_MULTU: + break; + default: + continue; + } fallthrough; - case OP_SPECIAL_MULT: - case OP_SPECIAL_MULTU: + case OP_META_MULT2: + case OP_META_MULTU2: break; default: continue; @@ -1929,7 +2057,8 @@ static int lightrec_replace_memset(struct lightrec_state *state, struct block *b if (i == ARRAY_SIZE(memset_code) - 1) { /* success! */ pr_debug("Block at PC 0x%x is a memset\n", block->pc); - block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE; + block_set_flags(block, + BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE); /* Return non-zero to skip other optimizers. */ return 1; diff --git a/deps/lightrec/reaper.c b/deps/lightrec/reaper.c index 2e32cae5..777b9970 100644 --- a/deps/lightrec/reaper.c +++ b/deps/lightrec/reaper.c @@ -24,8 +24,10 @@ struct reaper_elm { struct reaper { struct lightrec_state *state; pthread_mutex_t mutex; + pthread_cond_t cond; struct slist_elm reap_list; + bool running; atomic_uint sem; }; @@ -41,22 +43,36 @@ struct reaper *lightrec_reaper_init(struct lightrec_state *state) } reaper->state = state; + reaper->running = false; reaper->sem = 0; slist_init(&reaper->reap_list); ret = pthread_mutex_init(&reaper->mutex, NULL); if (ret) { pr_err("Cannot init mutex variable: %d\n", ret); - lightrec_free(reaper->state, MEM_FOR_LIGHTREC, - sizeof(*reaper), reaper); - return NULL; + goto err_free_reaper; + } + + ret = pthread_cond_init(&reaper->cond, NULL); + if (ret) { + pr_err("Cannot init cond variable: %d\n", ret); + goto err_destroy_mutex; } return reaper; + +err_destroy_mutex: + pthread_mutex_destroy(&reaper->mutex); +err_free_reaper: + lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper); + return NULL; } void lightrec_reaper_destroy(struct reaper *reaper) { + lightrec_reaper_reap(reaper); + + pthread_cond_destroy(&reaper->cond); pthread_mutex_destroy(&reaper->mutex); lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper); } @@ -108,6 +124,7 @@ void lightrec_reaper_reap(struct reaper *reaper) while (lightrec_reaper_can_reap(reaper) && !!(elm = slist_first(&reaper->reap_list))) { slist_remove(&reaper->reap_list, elm); + reaper->running = true; pthread_mutex_unlock(&reaper->mutex); reaper_elm = container_of(elm, struct reaper_elm, slist); @@ -118,6 +135,8 @@ void lightrec_reaper_reap(struct reaper *reaper) sizeof(*reaper_elm), reaper_elm); pthread_mutex_lock(&reaper->mutex); + reaper->running = false; + pthread_cond_broadcast(&reaper->cond); } pthread_mutex_unlock(&reaper->mutex); @@ -126,6 +145,11 @@ void lightrec_reaper_reap(struct reaper *reaper) void lightrec_reaper_pause(struct reaper *reaper) { atomic_fetch_add_explicit(&reaper->sem, 1, memory_order_relaxed); + + pthread_mutex_lock(&reaper->mutex); + while (reaper->running) + pthread_cond_wait(&reaper->cond, &reaper->mutex); + pthread_mutex_unlock(&reaper->mutex); } void lightrec_reaper_continue(struct reaper *reaper) diff --git a/deps/lightrec/recompiler.c b/deps/lightrec/recompiler.c index 7350adba..08a9235a 100644 --- a/deps/lightrec/recompiler.c +++ b/deps/lightrec/recompiler.c @@ -106,29 +106,20 @@ static bool lightrec_cancel_block_rec(struct recompiler *rec, static void lightrec_cancel_list(struct recompiler *rec) { struct block_rec *block_rec; - struct slist_elm *next; - - while (!!(next = lightrec_get_first_elm(&rec->slist))) { - block_rec = container_of(next, struct block_rec, slist); + struct slist_elm *elm, *head = &rec->slist; + for (elm = slist_first(head); elm; elm = slist_first(head)) { + block_rec = container_of(elm, struct block_rec, slist); lightrec_cancel_block_rec(rec, block_rec); } - - pthread_cond_broadcast(&rec->cond2); } static void lightrec_flush_code_buffer(struct lightrec_state *state, void *d) { struct recompiler *rec = d; - pthread_mutex_lock(&rec->mutex); - - if (rec->must_flush) { - lightrec_remove_outdated_blocks(state->block_cache, NULL); - rec->must_flush = false; - } - - pthread_mutex_unlock(&rec->mutex); + lightrec_remove_outdated_blocks(state->block_cache, NULL); + rec->must_flush = false; } static void lightrec_compile_list(struct recompiler *rec, @@ -146,19 +137,23 @@ static void lightrec_compile_list(struct recompiler *rec, pthread_mutex_unlock(&rec->mutex); - if (likely(!(block->flags & BLOCK_IS_DEAD))) { + if (likely(!block_has_flag(block, BLOCK_IS_DEAD))) { ret = lightrec_compile_block(thd->cstate, block); if (ret == -ENOMEM) { /* Code buffer is full. Request the reaper to * flush it. */ pthread_mutex_lock(&rec->mutex); + block_rec->compiling = false; + pthread_cond_broadcast(&rec->cond2); + if (!rec->must_flush) { + rec->must_flush = true; + lightrec_cancel_list(rec); + lightrec_reaper_add(rec->state->reaper, lightrec_flush_code_buffer, rec); - lightrec_cancel_list(rec); - rec->must_flush = true; } return; } @@ -174,7 +169,7 @@ static void lightrec_compile_list(struct recompiler *rec, slist_remove(&rec->slist, next); lightrec_free(rec->state, MEM_FOR_LIGHTREC, sizeof(*block_rec), block_rec); - pthread_cond_signal(&rec->cond2); + pthread_cond_broadcast(&rec->cond2); } } @@ -333,7 +328,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) /* If the block is marked as dead, don't compile it, it will be removed * as soon as it's safe. */ - if (block->flags & BLOCK_IS_DEAD) + if (block_has_flag(block, BLOCK_IS_DEAD)) goto out_unlock; for (elm = slist_first(&rec->slist), prev = NULL; elm; @@ -345,7 +340,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) * it to the top of the list, unless the block is being * recompiled. */ if (prev && !block_rec->compiling && - !(block->flags & BLOCK_SHOULD_RECOMPILE)) { + !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) { slist_remove_next(prev); slist_append(&rec->slist, elm); } @@ -356,7 +351,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) /* By the time this function was called, the block has been recompiled * and ins't in the wait list anymore. Just return here. */ - if (block->function && !(block->flags & BLOCK_SHOULD_RECOMPILE)) + if (block->function && !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) goto out_unlock; block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC, @@ -375,7 +370,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block) /* If the block is being recompiled, push it to the end of the queue; * otherwise push it to the front of the queue. */ - if (block->flags & BLOCK_SHOULD_RECOMPILE) + if (block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) for (; elm->next; elm = elm->next); slist_append(elm, &block_rec->slist); @@ -419,31 +414,36 @@ out_unlock: void * lightrec_recompiler_run_first_pass(struct lightrec_state *state, struct block *block, u32 *pc) { - bool freed; + u8 old_flags; /* There's no point in running the first pass if the block will never * be compiled. Let the main loop run the interpreter instead. */ - if (block->flags & BLOCK_NEVER_COMPILE) + if (block_has_flag(block, BLOCK_NEVER_COMPILE)) return NULL; + /* The block is marked as dead, and will be removed the next time the + * reaper is run. In the meantime, the old function can still be + * executed. */ + if (block_has_flag(block, BLOCK_IS_DEAD)) + return block->function; + /* If the block is already fully tagged, there is no point in running * the first pass. Request a recompilation of the block, and maybe the * interpreter will run the block in the meantime. */ - if (block->flags & BLOCK_FULLY_TAGGED) + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) lightrec_recompiler_add(state->rec, block); if (likely(block->function)) { - if (block->flags & BLOCK_FULLY_TAGGED) { - freed = atomic_flag_test_and_set(&block->op_list_freed); + if (block_has_flag(block, BLOCK_FULLY_TAGGED)) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); - if (!freed) { + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) { pr_debug("Block PC 0x%08x is fully tagged" " - free opcode list\n", block->pc); /* The block was already compiled but the opcode list * didn't get freed yet - do it now */ - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + lightrec_free_opcode_list(state, block->opcode_list); } } @@ -452,23 +452,25 @@ void * lightrec_recompiler_run_first_pass(struct lightrec_state *state, /* Mark the opcode list as freed, so that the threaded compiler won't * free it while we're using it in the interpreter. */ - freed = atomic_flag_test_and_set(&block->op_list_freed); + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); /* Block wasn't compiled yet - run the interpreter */ *pc = lightrec_emulate_block(state, block, *pc); - if (!freed) - atomic_flag_clear(&block->op_list_freed); + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) + block_clear_flags(block, BLOCK_NO_OPCODE_LIST); /* The block got compiled while the interpreter was running. * We can free the opcode list now. */ - if (block->function && (block->flags & BLOCK_FULLY_TAGGED) && - !atomic_flag_test_and_set(&block->op_list_freed)) { - pr_debug("Block PC 0x%08x is fully tagged" - " - free opcode list\n", block->pc); + if (block->function && block_has_flag(block, BLOCK_FULLY_TAGGED)) { + old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST); - lightrec_free_opcode_list(state, block); - block->opcode_list = NULL; + if (!(old_flags & BLOCK_NO_OPCODE_LIST)) { + pr_debug("Block PC 0x%08x is fully tagged" + " - free opcode list\n", block->pc); + + lightrec_free_opcode_list(state, block->opcode_list); + } } return NULL; diff --git a/deps/lightrec/regcache.c b/deps/lightrec/regcache.c index 791a9c5c..1f11d8a2 100644 --- a/deps/lightrec/regcache.c +++ b/deps/lightrec/regcache.c @@ -11,10 +11,22 @@ #include #include +enum reg_priority { + REG_IS_TEMP, + REG_IS_TEMP_VALUE, + REG_IS_ZERO, + REG_IS_LOADED, + REG_IS_DIRTY, + + REG_NB_PRIORITIES, +}; + struct native_register { - bool used, loaded, dirty, output, extend, extended, + bool used, output, extend, extended, zero_extend, zero_extended, locked; s8 emulated_register; + intptr_t value; + enum reg_priority prio; }; struct regcache { @@ -69,7 +81,11 @@ static inline u8 lightrec_reg_to_lightning(const struct regcache *cache, const struct native_register *nreg) { u8 offset = lightrec_reg_number(cache, nreg); - return offset < NUM_REGS ? JIT_V(offset) : JIT_R(offset - NUM_REGS); + + if (offset < NUM_REGS) + return JIT_V(FIRST_REG + offset); + else + return JIT_R(FIRST_TEMP + offset - NUM_REGS); } static inline struct native_register * lightning_reg_to_lightrec( @@ -78,14 +94,14 @@ static inline struct native_register * lightning_reg_to_lightrec( if ((JIT_V0 > JIT_R0 && reg >= JIT_V0) || (JIT_V0 < JIT_R0 && reg < JIT_R0)) { if (JIT_V1 > JIT_V0) - return &cache->lightrec_regs[reg - JIT_V0]; + return &cache->lightrec_regs[reg - JIT_V(FIRST_REG)]; else - return &cache->lightrec_regs[JIT_V0 - reg]; + return &cache->lightrec_regs[JIT_V(FIRST_REG) - reg]; } else { if (JIT_R1 > JIT_R0) - return &cache->lightrec_regs[NUM_REGS + reg - JIT_R0]; + return &cache->lightrec_regs[NUM_REGS + reg - JIT_R(FIRST_TEMP)]; else - return &cache->lightrec_regs[NUM_REGS + JIT_R0 - reg]; + return &cache->lightrec_regs[NUM_REGS + JIT_R(FIRST_TEMP) - reg]; } } @@ -119,6 +135,8 @@ void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags) static struct native_register * alloc_temp(struct regcache *cache) { + struct native_register *elm, *nreg = NULL; + enum reg_priority best = REG_NB_PRIORITIES; unsigned int i; /* We search the register list in reverse order. As temporaries are @@ -126,18 +144,18 @@ static struct native_register * alloc_temp(struct regcache *cache) * caller-saved registers, as they won't have to be saved back to * memory. */ for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) { - struct native_register *nreg = &cache->lightrec_regs[i - 1]; - if (!nreg->used && !nreg->loaded && !nreg->dirty) - return nreg; - } + elm = &cache->lightrec_regs[i - 1]; - for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) { - struct native_register *nreg = &cache->lightrec_regs[i - 1]; - if (!nreg->used) - return nreg; + if (!elm->used && elm->prio < best) { + nreg = elm; + best = elm->prio; + + if (best == REG_IS_TEMP) + break; + } } - return NULL; + return nreg; } static struct native_register * find_mapped_reg(struct regcache *cache, @@ -147,9 +165,9 @@ static struct native_register * find_mapped_reg(struct regcache *cache, for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { struct native_register *nreg = &cache->lightrec_regs[i]; - if ((!reg || nreg->loaded || nreg->dirty) && - nreg->emulated_register == reg && - (!out || !nreg->locked)) + if ((nreg->prio >= REG_IS_ZERO) && + nreg->emulated_register == reg && + (!out || !nreg->locked)) return nreg; } @@ -159,7 +177,8 @@ static struct native_register * find_mapped_reg(struct regcache *cache, static struct native_register * alloc_in_out(struct regcache *cache, u8 reg, bool out) { - struct native_register *nreg; + struct native_register *elm, *nreg = NULL; + enum reg_priority best = REG_NB_PRIORITIES; unsigned int i; /* Try to find if the register is already mapped somewhere */ @@ -167,48 +186,39 @@ static struct native_register * alloc_in_out(struct regcache *cache, if (nreg) return nreg; - /* Try to allocate a non-dirty, non-loaded register. - * Loaded registers may be re-used later, so it's better to avoid - * re-using one if possible. */ - for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { - nreg = &cache->lightrec_regs[i]; - if (!nreg->used && !nreg->dirty && !nreg->loaded) - return nreg; - } + nreg = NULL; - /* Try to allocate a non-dirty register */ for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { - nreg = &cache->lightrec_regs[i]; - if (!nreg->used && !nreg->dirty) - return nreg; - } + elm = &cache->lightrec_regs[i]; - for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { - nreg = &cache->lightrec_regs[i]; - if (!nreg->used) - return nreg; + if (!elm->used && elm->prio < best) { + nreg = elm; + best = elm->prio; + + if (best == REG_IS_TEMP) + break; + } } - return NULL; + return nreg; } static void lightrec_discard_nreg(struct native_register *nreg) { nreg->extended = false; nreg->zero_extended = false; - nreg->loaded = false; nreg->output = false; - nreg->dirty = false; nreg->used = false; nreg->locked = false; nreg->emulated_register = -1; + nreg->prio = 0; } static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit, struct native_register *nreg, u8 jit_reg) { /* If we get a dirty register, store back the old value */ - if (nreg->dirty) { + if (nreg->prio == REG_IS_DIRTY) { s16 offset = offsetof(struct lightrec_state, regs.gpr) + (nreg->emulated_register << 2); @@ -253,6 +263,7 @@ u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) lightrec_unload_nreg(cache, _jit, reg, jit_reg); reg->used = true; + reg->prio = REG_IS_LOADED; return jit_reg; } @@ -269,10 +280,38 @@ u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit) jit_reg = lightrec_reg_to_lightning(cache, nreg); lightrec_unload_nreg(cache, _jit, nreg, jit_reg); + nreg->prio = REG_IS_TEMP; nreg->used = true; return jit_reg; } +s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value) +{ + struct native_register *nreg; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) { + nreg = &cache->lightrec_regs[i]; + + if (nreg->prio == REG_IS_TEMP_VALUE && nreg->value == value) { + nreg->used = true; + return lightrec_reg_to_lightning(cache, nreg); + } + } + + return -1; +} + +void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value) +{ + struct native_register *nreg; + + nreg = lightning_reg_to_lightrec(cache, jit_reg); + + nreg->prio = REG_IS_TEMP_VALUE; + nreg->value = value; +} + u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, u8 reg, u8 flags) { @@ -303,6 +342,7 @@ u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit, nreg->emulated_register = reg; nreg->extend = flags & REG_EXT; nreg->zero_extend = flags & REG_ZEXT; + nreg->prio = reg ? REG_IS_LOADED : REG_IS_ZERO; return jit_reg; } @@ -333,7 +373,7 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, if (reg_changed) lightrec_unload_nreg(cache, _jit, nreg, jit_reg); - if (!nreg->loaded && !nreg->dirty && reg != 0) { + if (nreg->prio < REG_IS_LOADED && reg != 0) { s16 offset = offsetof(struct lightrec_state, regs.gpr) + (reg << 2); @@ -346,15 +386,15 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, else jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset); - nreg->loaded = true; + nreg->prio = REG_IS_LOADED; } /* Clear register r0 before use */ - if (reg == 0 && (!nreg->loaded || nreg->dirty)) { + if (reg == 0 && nreg->prio != REG_IS_ZERO) { jit_movi(jit_reg, 0); nreg->extended = true; nreg->zero_extended = true; - nreg->loaded = true; + nreg->prio = REG_IS_ZERO; } nreg->used = true; @@ -399,8 +439,8 @@ u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit, nreg->extended = true; nreg->zero_extended = false; nreg->used = true; - nreg->loaded = true; nreg->emulated_register = reg; + nreg->prio = REG_IS_LOADED; return jit_reg; } @@ -409,7 +449,7 @@ static void free_reg(struct native_register *nreg) { /* Set output registers as dirty */ if (nreg->used && nreg->output && nreg->emulated_register > 0) - nreg->dirty = true; + nreg->prio = REG_IS_DIRTY; if (nreg->output) { nreg->extended = nreg->extend; nreg->zero_extended = nreg->zero_extend; @@ -434,13 +474,18 @@ void lightrec_free_regs(struct regcache *cache) static void clean_reg(jit_state_t *_jit, struct native_register *nreg, u8 jit_reg, bool clean) { - if (nreg->dirty) { + if (nreg->prio == REG_IS_DIRTY) { s16 offset = offsetof(struct lightrec_state, regs.gpr) + (nreg->emulated_register << 2); jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg); - nreg->loaded |= nreg->dirty; - nreg->dirty ^= clean; + + if (clean) { + if (nreg->emulated_register == 0) + nreg->prio = REG_IS_ZERO; + else + nreg->prio = REG_IS_LOADED; + } } } @@ -448,11 +493,13 @@ static void clean_regs(struct regcache *cache, jit_state_t *_jit, bool clean) { unsigned int i; - for (i = 0; i < NUM_REGS; i++) - clean_reg(_jit, &cache->lightrec_regs[i], JIT_V(i), clean); + for (i = 0; i < NUM_REGS; i++) { + clean_reg(_jit, &cache->lightrec_regs[i], + JIT_V(FIRST_REG + i), clean); + } for (i = 0; i < NUM_TEMPS; i++) { clean_reg(_jit, &cache->lightrec_regs[i + NUM_REGS], - JIT_R(i), clean); + JIT_R(FIRST_TEMP + i), clean); } } @@ -466,6 +513,17 @@ void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit) clean_regs(cache, _jit, true); } +bool lightrec_has_dirty_regs(struct regcache *cache) +{ + unsigned int i; + + for (i = 0; i < NUM_REGS + NUM_TEMPS; i++) + if (cache->lightrec_regs[i].prio == REG_IS_DIRTY) + return true; + + return false; +} + void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) { struct native_register *reg; @@ -557,15 +615,18 @@ void lightrec_regcache_mark_live(struct regcache *cache, jit_state_t *_jit) for (i = 0; i < NUM_REGS; i++) { nreg = &cache->lightrec_regs[i]; - if (nreg->used || nreg->loaded || nreg->dirty) - jit_live(JIT_V(i)); + if (nreg->used || nreg->prio > REG_IS_TEMP) + jit_live(JIT_V(FIRST_REG + i)); } #endif for (i = 0; i < NUM_TEMPS; i++) { nreg = &cache->lightrec_regs[NUM_REGS + i]; - if (nreg->used || nreg->loaded || nreg->dirty) - jit_live(JIT_R(i)); + if (nreg->used || nreg->prio > REG_IS_TEMP) + jit_live(JIT_R(FIRST_TEMP + i)); } + + jit_live(LIGHTREC_REG_STATE); + jit_live(LIGHTREC_REG_CYCLE); } diff --git a/deps/lightrec/regcache.h b/deps/lightrec/regcache.h index 5aa5050f..cffbf053 100644 --- a/deps/lightrec/regcache.h +++ b/deps/lightrec/regcache.h @@ -6,12 +6,25 @@ #ifndef __REGCACHE_H__ #define __REGCACHE_H__ -#include "lightrec-private.h" +#include "lightning-wrapper.h" -#define NUM_REGS (JIT_V_NUM - 2) -#define NUM_TEMPS (JIT_R_NUM) +#define NUM_REGS (JIT_V_NUM - 1) #define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1)) -#define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2)) + +#if defined(__powerpc__) +# define NUM_TEMPS JIT_R_NUM +/* JIT_R0 is callee-saved on PowerPC, we have to use something else */ +# define LIGHTREC_REG_CYCLE _R10 +# define FIRST_TEMP 0 +#else +# define NUM_TEMPS (JIT_R_NUM - 1) +# define LIGHTREC_REG_CYCLE JIT_R0 +# define FIRST_TEMP 1 +#endif + +#include "lightrec-private.h" + +#define FIRST_REG 0 /* Flags for lightrec_alloc_reg_in / lightrec_alloc_reg_out. */ #define REG_EXT BIT(0) /* register is sign-extended */ @@ -35,6 +48,9 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit, u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit, u8 reg, u8 jit_reg); +s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value); +void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value); + u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg); void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags); @@ -47,6 +63,7 @@ void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit); void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit); +_Bool lightrec_has_dirty_regs(struct regcache *cache); void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, u8 reg, _Bool unload); diff --git a/include/lightning/lightning.h b/include/lightning/lightning.h index e7afc5c2..23015a44 100644 --- a/include/lightning/lightning.h +++ b/include/lightning/lightning.h @@ -24,6 +24,7 @@ #include #include #include +#include #if defined(__hpux) && defined(__hppa__) # include @@ -913,6 +914,10 @@ typedef enum { #define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) #endif + jit_code_casr, jit_code_casi, +#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x) +#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x) + jit_code_last_code } jit_code_t; @@ -1081,6 +1086,10 @@ extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t, extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t, jit_int32_t, jit_int32_t, jit_word_t, jit_word_t); +#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h) +extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t, + jit_word_t, jit_word_t, + jit_int32_t, jit_int32_t); #define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w) extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t, jit_word_t, jit_word_t, jit_float32_t); diff --git a/libpcsxcore/lightrec/plugin.c b/libpcsxcore/lightrec/plugin.c index 52d37f0f..13c11e34 100644 --- a/libpcsxcore/lightrec/plugin.c +++ b/libpcsxcore/lightrec/plugin.c @@ -53,8 +53,6 @@ static char *name = "retroarch.exe"; static bool use_lightrec_interpreter; static bool use_pcsx_interpreter; -static bool lightrec_debug; -static bool lightrec_very_debug; static bool booting; static u32 lightrec_begin_cycles; @@ -305,9 +303,86 @@ static void lightrec_enable_ram(struct lightrec_state *state, bool enable) memcpy(cache_buf, psxM, sizeof(cache_buf)); } +static bool lightrec_can_hw_direct(u32 kaddr, bool is_write, u8 size) +{ + switch (size) { + case 8: + switch (kaddr) { + case 0x1f801040: + case 0x1f801050: + case 0x1f801800: + case 0x1f801801: + case 0x1f801802: + case 0x1f801803: + return false; + default: + return true; + } + case 16: + switch (kaddr) { + case 0x1f801040: + case 0x1f801044: + case 0x1f801048: + case 0x1f80104a: + case 0x1f80104e: + case 0x1f801050: + case 0x1f801054: + case 0x1f80105a: + case 0x1f80105e: + case 0x1f801100: + case 0x1f801104: + case 0x1f801108: + case 0x1f801110: + case 0x1f801114: + case 0x1f801118: + case 0x1f801120: + case 0x1f801124: + case 0x1f801128: + return false; + case 0x1f801070: + case 0x1f801074: + return !is_write; + default: + return is_write || kaddr < 0x1f801c00 || kaddr >= 0x1f801e00; + } + default: + switch (kaddr) { + case 0x1f801040: + case 0x1f801050: + case 0x1f801100: + case 0x1f801104: + case 0x1f801108: + case 0x1f801110: + case 0x1f801114: + case 0x1f801118: + case 0x1f801120: + case 0x1f801124: + case 0x1f801128: + case 0x1f801810: + case 0x1f801814: + case 0x1f801820: + case 0x1f801824: + return false; + case 0x1f801070: + case 0x1f801074: + case 0x1f801088: + case 0x1f801098: + case 0x1f8010a8: + case 0x1f8010b8: + case 0x1f8010c8: + case 0x1f8010e8: + case 0x1f8010f4: + return !is_write; + default: + return !is_write || kaddr < 0x1f801c00 || kaddr >= 0x1f801e00; + } + } +} + static const struct lightrec_ops lightrec_ops = { .cop2_op = cop2_op, .enable_ram = lightrec_enable_ram, + .hw_direct = lightrec_can_hw_direct, }; static int lightrec_plugin_init(void) @@ -321,11 +396,10 @@ static int lightrec_plugin_init(void) lightrec_map[PSX_MAP_MIRROR1].address = psxM + 0x200000; lightrec_map[PSX_MAP_MIRROR2].address = psxM + 0x400000; lightrec_map[PSX_MAP_MIRROR3].address = psxM + 0x600000; + lightrec_map[PSX_MAP_HW_REGISTERS].address = psxH + 0x1000; lightrec_map[PSX_MAP_CODE_BUFFER].address = code_buffer; } - lightrec_debug = !!getenv("LIGHTREC_DEBUG"); - lightrec_very_debug = !!getenv("LIGHTREC_VERY_DEBUG"); use_lightrec_interpreter = !!getenv("LIGHTREC_INTERPRETER"); if (getenv("LIGHTREC_BEGIN_CYCLES")) lightrec_begin_cycles = (unsigned int) strtol( @@ -347,90 +421,6 @@ static int lightrec_plugin_init(void) return 0; } -static u32 hash_calculate_le(const void *buffer, u32 count) -{ - unsigned int i; - u32 *data = (u32 *) buffer; - u32 hash = 0xffffffff; - - count /= 4; - for(i = 0; i < count; ++i) { - hash += LE32TOH(data[i]); - hash += (hash << 10); - hash ^= (hash >> 6); - } - - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - -static u32 hash_calculate(const void *buffer, u32 count) -{ - unsigned int i; - u32 *data = (u32 *) buffer; - u32 hash = 0xffffffff; - - count /= 4; - for(i = 0; i < count; ++i) { - hash += data[i]; - hash += (hash << 10); - hash ^= (hash >> 6); - } - - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; -} - -static const char * const mips_regs[] = { - "zero", - "at", - "v0", "v1", - "a0", "a1", "a2", "a3", - "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", - "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", - "t8", "t9", - "k0", "k1", - "gp", "sp", "fp", "ra", - "lo", "hi", -}; - -static void print_for_big_ass_debugger(void) -{ - unsigned int i; - - printf("CYCLE 0x%08x PC 0x%08x", psxRegs.cycle, psxRegs.pc); - - if (lightrec_very_debug) - printf(" RAM 0x%08x SCRATCH 0x%08x HW 0x%08x", - hash_calculate_le(psxM, 0x200000), - hash_calculate_le(psxH, 0x400), - hash_calculate_le(psxH + 0x1000, 0x2000)); - - printf(" CP0 0x%08x CP2D 0x%08x CP2C 0x%08x INT 0x%04x INTCYCLE 0x%08x GPU 0x%08x", - hash_calculate(&psxRegs.CP0.r, - sizeof(psxRegs.CP0.r)), - hash_calculate(&psxRegs.CP2D.r, - sizeof(psxRegs.CP2D.r)), - hash_calculate(&psxRegs.CP2C.r, - sizeof(psxRegs.CP2C.r)), - psxRegs.interrupt, - hash_calculate(psxRegs.intCycle, - sizeof(psxRegs.intCycle)), - LE32TOH(HW_GPU_STATUS)); - - if (lightrec_very_debug) - for (i = 0; i < 34; i++) - printf(" %s 0x%08x", mips_regs[i], psxRegs.GPR.r[i]); - else - printf(" GPR 0x%08x", hash_calculate(&psxRegs.GPR.r, - sizeof(psxRegs.GPR.r))); - printf("\n"); -} - static void lightrec_dump_regs(struct lightrec_state *state) { struct lightrec_registers *regs = lightrec_get_registers(state); @@ -462,22 +452,24 @@ static void lightrec_plugin_execute_block(void) gen_interupt(); + // step during early boot so that 0x80030000 fastboot hack works + if (booting) + next_interupt = psxRegs.cycle; + if (use_pcsx_interpreter) { intExecuteBlock(); } else { lightrec_reset_cycle_count(lightrec_state, psxRegs.cycle); lightrec_restore_regs(lightrec_state); - if (unlikely(use_lightrec_interpreter)) + if (unlikely(use_lightrec_interpreter)) { psxRegs.pc = lightrec_run_interpreter(lightrec_state, - psxRegs.pc); - // step during early boot so that 0x80030000 fastboot hack works - else if (unlikely(booting || lightrec_debug)) - psxRegs.pc = lightrec_execute_one(lightrec_state, - psxRegs.pc); - else + psxRegs.pc, + next_interupt); + } else { psxRegs.pc = lightrec_execute(lightrec_state, psxRegs.pc, next_interupt); + } psxRegs.cycle = lightrec_current_cycle_count(lightrec_state); @@ -497,10 +489,6 @@ static void lightrec_plugin_execute_block(void) booting = false; } - if (lightrec_debug && psxRegs.cycle >= lightrec_begin_cycles - && psxRegs.pc != old_pc) - print_for_big_ass_debugger(); - if ((psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x300) && (psxRegs.CP0.n.Status & 0x1)) { /* Handle software interrupts */ -- 2.39.2