Update lightrec 20220910 (#686)
authorPaul Cercueil <paul@crapouillou.net>
Sun, 11 Sep 2022 09:12:45 +0000 (11:12 +0200)
committerGitHub <noreply@github.com>
Sun, 11 Sep 2022 09:12:45 +0000 (11:12 +0200)
* git subrepo pull --force deps/lightning

subrepo:
  subdir:   "deps/lightning"
  merged:   "b1dfc564e2"
upstream:
  origin:   "https://github.com/pcercuei/gnu_lightning.git"
  branch:   "pcsx_rearmed"
  commit:   "b1dfc564e2"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "2f68596"

* include: update lightning.h

Update lightning.h with a copy generated from the latest master.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
* git subrepo pull --force deps/lightrec

subrepo:
  subdir:   "deps/lightrec"
  merged:   "e122276183"
upstream:
  origin:   "https://github.com/pcercuei/lightrec.git"
  branch:   "master"
  commit:   "e122276183"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "2f68596"

* lightrec: Update to latest Lightrec API

Remove the debug features since they aren't really useful in the
libretro core.

Update the glue code to use the updated API functions; and implement
lightrec_can_hw_direct() for a slight performance increase.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Paul Cercueil <paul@crapouillou.net>
66 files changed:
deps/lightning/.gitrepo
deps/lightning/ChangeLog
deps/lightning/check/Makefile.am
deps/lightning/check/catomic.c [new file with mode: 0644]
deps/lightning/check/catomic.ok [new file with mode: 0644]
deps/lightning/check/lightning.c
deps/lightning/configure.ac
deps/lightning/doc/body.texi
deps/lightning/include/lightning.h.in
deps/lightning/include/lightning/jit_private.h
deps/lightning/lib/jit_aarch64-cpu.c
deps/lightning/lib/jit_aarch64-sz.c
deps/lightning/lib/jit_aarch64.c
deps/lightning/lib/jit_alpha-cpu.c
deps/lightning/lib/jit_alpha-sz.c
deps/lightning/lib/jit_alpha.c
deps/lightning/lib/jit_arm-cpu.c
deps/lightning/lib/jit_arm-sz.c
deps/lightning/lib/jit_arm.c
deps/lightning/lib/jit_disasm.c
deps/lightning/lib/jit_fallback.c [new file with mode: 0644]
deps/lightning/lib/jit_hppa-cpu.c
deps/lightning/lib/jit_hppa-sz.c
deps/lightning/lib/jit_hppa.c
deps/lightning/lib/jit_ia64-cpu.c
deps/lightning/lib/jit_ia64-sz.c
deps/lightning/lib/jit_ia64.c
deps/lightning/lib/jit_mips-cpu.c
deps/lightning/lib/jit_mips-sz.c
deps/lightning/lib/jit_mips.c
deps/lightning/lib/jit_names.c
deps/lightning/lib/jit_ppc-cpu.c
deps/lightning/lib/jit_ppc-fpu.c
deps/lightning/lib/jit_ppc-sz.c
deps/lightning/lib/jit_ppc.c
deps/lightning/lib/jit_print.c
deps/lightning/lib/jit_riscv-cpu.c
deps/lightning/lib/jit_riscv-sz.c
deps/lightning/lib/jit_riscv.c
deps/lightning/lib/jit_s390-cpu.c
deps/lightning/lib/jit_s390-sz.c
deps/lightning/lib/jit_s390.c
deps/lightning/lib/jit_sparc-cpu.c
deps/lightning/lib/jit_sparc-sz.c
deps/lightning/lib/jit_sparc.c
deps/lightning/lib/jit_x86-cpu.c
deps/lightning/lib/jit_x86-sz.c
deps/lightning/lib/jit_x86.c
deps/lightning/lib/lightning.c
deps/lightrec/.gitrepo
deps/lightrec/README.md
deps/lightrec/blockcache.c
deps/lightrec/disassembler.c
deps/lightrec/disassembler.h
deps/lightrec/emitter.c
deps/lightrec/interpreter.c
deps/lightrec/lightrec-private.h
deps/lightrec/lightrec.c
deps/lightrec/lightrec.h
deps/lightrec/optimizer.c
deps/lightrec/reaper.c
deps/lightrec/recompiler.c
deps/lightrec/regcache.c
deps/lightrec/regcache.h
include/lightning/lightning.h
libpcsxcore/lightrec/plugin.c

index 420b6e2..e1611ab 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/gnu_lightning.git
        branch = pcsx_rearmed
-       commit = 7fce9abb2a6bfc3967b4e5705794e617ed909402
-       parent = 94d482f4b7f5da2c5af7e3590b770261f907f185
+       commit = b1dfc564e2327621d15e688911a398c3a729bd82
+       parent = 7393802c34796806043533cd379e5bcbd66cfd54
        method = merge
        cmdver = 0.4.3
index a842040..7fe5c7c 100644 (file)
@@ -1,3 +1,43 @@
+2022-09-08 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_fallback.c: Implement fallback compare and swap with
+       pthreads.
+       * check/Makefile.am: Update for new cas{r,i} simple test.
+       * check/catomic.c, check/catomic.ok: New test case for
+       simple compare and swap atomic operation.
+       * check/lightning.c: Add entries to be able to use
+       the new compare and swap atomic operation. Still missing
+       a general test, only the basic C version.
+       * include/lightning.h.in: Include pthread.h, even if not
+       needing a fallback compare and swap.
+       * include/lightning/jit_private.h: Add support for a register pair
+       in second argument. Required by the new casr and casi operations.
+       * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c,
+       lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_x86-cpu.c,
+       lib/jit_x86-sz.c, lib/jit_x86.c: Implement inline code for compare
+       and swap.
+       * lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c: Implement
+       inline code for compare and swap if cpu is armv7, otherwise, use
+       a fallback with pthreads.
+       * lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c,
+       lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c,
+       lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c,
+       lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c,
+       lib/jit_riscv-cpu.c, lib/jit_riscv-sz.c, lib/jit_riscv.c,
+       lib/jit_s390-cpu.c, lib/jit_s390-sz.c, lib/jit_s390.c,
+       lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c: Implement
+       fallback compare and swap with pthreads. At least some of these
+       should be updated for inline code generation.
+       * lib/jit_names.c, lib/jit_print.c: lib/lightning.c: Update for the
+       new compare and swap operation.
+       * doc/body.texi: Add simple documentation of the compare and swap
+       new operation.
+
+2022-08-12  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+       Document jit_align.
+       * doc/body.texi: Add documentation for jit_align.
+
 2022-05-14 Paulo Andrade <pcpa@gnu.org>
 
        * include/lightning.h.in: Reorder jit_mov{n,z}r in instruction list.
index fc9f232..3cc54d1 100644 (file)
@@ -16,7 +16,8 @@
 
 AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
 
-check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list
+check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \
+       catomic
 
 lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
 lightning_SOURCES = lightning.c
@@ -42,6 +43,9 @@ carg_SOURCES = carg.c
 cva_list_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
 cva_list_SOURCES = cva_list.c
 
+catomic_LDADD = $(top_builddir)/lib/liblightning.la -lm -lpthread $(SHLIB)
+catomic_SOURCES = catomic.c
+
 $(top_builddir)/lib/liblightning.la:
        cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
 
@@ -319,7 +323,7 @@ $(nodata_TESTS):    check.nodata.sh
 TESTS += $(nodata_TESTS)
 endif
 
-TESTS += ccall self setcode nodata ctramp carg cva_list
+TESTS += ccall self setcode nodata ctramp carg cva_list catomic
 CLEANFILES = $(TESTS)
 
 #TESTS_ENVIRONMENT=$(srcdir)/run-test;
diff --git a/deps/lightning/check/catomic.c b/deps/lightning/check/catomic.c
new file mode 100644 (file)
index 0000000..04a2f89
--- /dev/null
@@ -0,0 +1,144 @@
+#include <lightning.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+
+void alarm_handler(int unused)
+{
+    _exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+    jit_state_t                *_jit;
+    void               (*code)(void);
+    jit_node_t          *jmpi_main, *label;
+    jit_node_t          *func0, *func1, *func2, *func3;
+    jit_node_t          *patch0, *patch1, *patch2, *patch3;
+    jit_word_t           lock;
+    pthread_t            tids[4];
+
+    /* If there is any bug, do not hang in "make check" */
+    signal(SIGALRM, alarm_handler);
+    alarm(5);
+  
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    jmpi_main = jit_jmpi();
+
+#define defun(name, line)                                      \
+    jit_name(#name);                                           \
+    jit_note("catomic.c", line);                               \
+    name = jit_label();                                                \
+     jit_prolog();                                             \
+    jit_movi(JIT_V0, (jit_word_t)&lock);                       \
+    jit_movi(JIT_R1, 0);                                       \
+    jit_movi(JIT_R2, line);                                    \
+    /* spin until get the lock */                              \
+    label = jit_label();                                       \
+    jit_casr(JIT_R0, JIT_V0, JIT_R1, JIT_R2);                  \
+    jit_patch_at(jit_beqi(JIT_R0, 0), label);                  \
+    /* lock acquired */                                                \
+    jit_prepare();                                             \
+    /* pretend to be doing something useful for 0.01 usec
+     * while holding the lock */                               \
+    jit_pushargi(10000);                                       \
+    jit_finishi(usleep);                                       \
+    /* release lock */                                         \
+    jit_movi(JIT_R1, 0);                                       \
+    jit_str(JIT_V0, JIT_R1);                                   \
+    /* Now test casi */                                                \
+    jit_movi(JIT_R1, 0);                                       \
+    jit_movi(JIT_R2, line);                                    \
+    /* spin until get the lock */                              \
+    label = jit_label();                                       \
+    jit_casi(JIT_R0, (jit_word_t)&lock, JIT_R1, JIT_R2);       \
+    jit_patch_at(jit_beqi(JIT_R0, 0), label);                  \
+    /* lock acquired */                                                \
+    jit_prepare();                                             \
+    /* pretend to be doing something useful for 0.01 usec
+     * while holding the lock */                               \
+    jit_pushargi(10000);                                       \
+    jit_finishi(usleep);                                       \
+    jit_prepare();                                             \
+    /* for make check, just print "ok" */                      \
+    jit_pushargi((jit_word_t)"ok");                            \
+    /*jit_pushargi((jit_word_t)#name);*/                       \
+    jit_finishi(puts);                                         \
+    /* release lock */                                         \
+    jit_movi(JIT_R1, 0);                                       \
+    jit_str(JIT_V0, JIT_R1);                                   \
+    jit_ret();                                                 \
+    jit_epilog();
+    defun(func0, __LINE__);
+    defun(func1, __LINE__);
+    defun(func2, __LINE__);
+    defun(func3, __LINE__);
+
+    jit_patch(jmpi_main);
+    jit_name("main");
+    jit_note("catomic.c", __LINE__);
+    jit_prolog();
+
+#define start(tid)                                             \
+    /* set JIT_R0 to thread function */                                \
+    jit_patch_at(jit_movi(JIT_R0, 0), func##tid);              \
+    jit_prepare();                                             \
+    /* pthread_t first argument */                             \
+    jit_pushargi((jit_word_t)(tids + tid));                    \
+    /* pthread_attr_t second argument */                       \
+    jit_pushargi((jit_word_t)NULL);                            \
+    /* start routine third argument */                         \
+    jit_pushargr(JIT_R0);                                      \
+    /* argument to start routine fourth argument */            \
+    jit_pushargi((jit_word_t)NULL);                            \
+    /* start thread */                                         \
+    jit_finishi(pthread_create);
+    /* spawn four threads */
+    start(0);
+    start(1);
+    start(2);
+    start(3);
+
+#define join(tid)                                              \
+    /* load pthread_t value in JIT_R0 */                       \
+    jit_movi(JIT_R0, (jit_word_t)tids);                                \
+    jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t));         \
+    jit_prepare();                                             \
+    jit_pushargr(JIT_R0);                                      \
+    jit_pushargi((jit_word_t)NULL);                            \
+    jit_finishi(pthread_join);
+    /* wait for threads to finish */
+    join(0);
+    join(1);
+    join(2);
+    join(3);
+
+    jit_prepare();
+    jit_pushargi((jit_word_t)"ok");
+    jit_finishi(puts);
+
+    jit_ret();
+    jit_epilog();
+
+    code = jit_emit();
+
+#if 1
+    jit_disassemble();
+#endif
+
+    jit_clear_state();
+
+    /* let first thread acquire the lock */
+    lock = 0;
+    
+    (*code)();
+    jit_destroy_state();
+
+    finish_jit();
+
+    return (0);
+}
diff --git a/deps/lightning/check/catomic.ok b/deps/lightning/check/catomic.ok
new file mode 100644 (file)
index 0000000..b130552
--- /dev/null
@@ -0,0 +1,5 @@
+ok
+ok
+ok
+ok
+ok
index 3cf3e70..34b5440 100644 (file)
@@ -316,6 +316,7 @@ static void ger_u(void);    static void gei_u(void);
 static void gtr(void);         static void gti(void);
 static void gtr_u(void);       static void gti_u(void);
 static void ner(void);         static void nei(void);
+static void casr(void);                static void casi(void);
 static void movr(void);                static void movi(void);
 static void extr_c(void);      static void extr_uc(void);
 static void extr_s(void);      static void extr_us(void);
@@ -636,6 +637,7 @@ static instr_t                instr_vector[] = {
     entry(gtr),                entry(gti),
     entry(gtr_u),      entry(gti_u),
     entry(ner),                entry(nei),
+    entry(casr),       entry(casi),
     entry(movr),       entry(movi),
     entry(extr_c),     entry(extr_uc),
     entry(extr_s),     entry(extr_us),
@@ -1028,6 +1030,16 @@ name(void)                                                               \
     jit_word_t im = get_imm();                                         \
     jit_##name(r0, r1, r2, im);                                                \
 }
+#define entry_ir_im_ir_ir(name)                                                \
+static void                                                            \
+name(void)                                                             \
+{                                                                      \
+    jit_gpr_t  r0 = get_ireg();                                        \
+    jit_word_t im = get_imm();                                         \
+    jit_gpr_t r1 = get_ireg(), r2 = get_ireg();                                \
+    jit_##name(r0, im, r1, r2);                                                \
+}
+
 #define entry_ir_ir(name)                                              \
 static void                                                            \
 name(void)                                                             \
@@ -1443,6 +1455,7 @@ entry_ir_ir_ir(ger_u)             entry_ir_ir_im(gei_u)
 entry_ir_ir_ir(gtr)            entry_ir_ir_im(gti)
 entry_ir_ir_ir(gtr_u)          entry_ir_ir_im(gti_u)
 entry_ir_ir_ir(ner)            entry_ir_ir_im(nei)
+entry_ir_ir_ir_ir(casr)                entry_ir_im_ir_ir(casi)
 entry_ir_ir(movr)
 static void
 movi(void)
index 5b582d2..8200651 100644 (file)
@@ -136,10 +136,22 @@ if test "x$DISASSEMBLER" != "xno"; then
                return 0;
        }
     )], [ac_cv_test_new_disassembler=no],,)
+    AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+       #include <dis-asm.h>
+       int main(int argc, char *argv[])
+       {
+               struct disassemble_info dinfo;
+               INIT_DISASSEMBLE_INFO(dinfo, NULL, NULL, NULL);
+               return 0;
+       }
+    )], [ac_cv_test_new_disassemble_info=yes],[ac_cv_test_new_disassemble_info=no],)
     CFLAGS="$save_CFLAGS"
     if test "x$ac_cv_test_new_disassembler" != "xno"; then
        LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DBINUTILS_2_29=1"
     fi
+    if test "x$ac_cv_test_new_disassemble_info" != "xno"; then
+       LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DBINUTILS_2_38=1"
+    fi
 fi
 
 AC_ARG_ENABLE(devel-disassembler,
index c174fcf..1d8d277 100644 (file)
@@ -597,6 +597,12 @@ forward   (not specified)                @r{forward label}
 indirect  (not specified)                @r{special simple label}
 @end example
 
+The following instruction is used to specify a minimal alignment for
+the next instruction, usually with a label:
+@example
+align     (not specified)                @r{align code}
+@end example
+
 @code{label} is normally used as @code{patch_at} argument for backward
 jumps.
 
@@ -649,6 +655,38 @@ that automatically binds the implicit label added by @code{patch} with
 the @code{movi}, but on some special conditions it is required to create
 an "unbound" label.
 
+@code{align} is useful for creating multiple entry points to a
+(trampoline) function that are all accessible through a single
+function pointer.  @code{align} receives an integer argument that
+defines the minimal alignment of the address of a label directly
+following the @code{align} instruction.  The integer argument must be
+a power of two and the effective alignment will be a power of two no
+less than the argument to @code{align}.  If the argument to
+@code{align} is 16 or more, the effective alignment will match the
+specified minimal alignment exactly.
+
+@example
+          jit_node_t *forward, *label1, *label2, *jump;
+          unsigned char *addr1, *addr2;
+forward = jit_forward();
+          jit_align(16);
+label1  = jit_indirect();                @rem{/* first entry point */}
+jump    = jit_jmpi();                    @rem{/* jump to first handler */}
+          jit_patch_at(jump, forward);
+          jit_align(16);
+label2  = jit_indirect();                @rem{/* second entry point */}
+          ...                            @rem{/* second handler */}
+          jit_jmpr(...);
+          jit_link(forward);
+          ...                            @rem{/* first handler /*}
+          jit_jmpr(...);
+          ...
+          jit_emit();
+          addr1 = jit_address(label1);
+          addr2 = jit_address(label2);
+          assert(addr2 - addr1 == 16);   @rem{/* only one of the addresses needs to be remembered */}
+@end example
+
 @item Function prolog
 
 These macros are used to set up a function prolog.  The @code{allocai}
@@ -890,6 +928,34 @@ to save and load the values when making function calls.
 @code{pointer_p} expects a pointer argument, and will return non
 zero if the pointer is inside the generated jit code. Must be
 called after @code{jit_emit} and before @code{jit_destroy_state}.
+
+@item Atomic operations
+Only compare-and-swap is implemented. It accepts four operands;
+the second can be an immediate.
+
+The first argument is set with a boolean value telling if the operation
+did succeed.
+
+Arguments must be different, cannot use the result register to also pass
+an argument.
+
+The second argument is the address of a machine word.
+
+The third argument is the old value.
+
+The fourth argument is the new value.
+
+@example
+casr                                  01 = (*O2 == O3) ? (*O2 = O4, 1) : 0
+casi                                  01 = (*O2 == O3) ? (*O2 = O4, 1) : 0
+@end example
+
+If value at the address in the second argument is equal to the third
+argument, the address value is atomically modified to the value of the
+fourth argument and the first argument is set to a non zero value.
+
+If the value at the address in the second argument is not equal to the
+third argument nothing is done and the first argument is set to zero.
 @end table
 
 @node GNU lightning examples
index 887a951..6f8ee03 100644 (file)
@@ -24,6 +24,7 @@
 #include <stdlib.h>
 @MAYBE_INCLUDE_STDINT_H@
 #include <string.h>
+#include <pthread.h>
 
 #if defined(__hpux) && defined(__hppa__)
 #  include <machine/param.h>
@@ -913,6 +914,10 @@ typedef enum {
 #define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ul,u,v)
 #endif
 
+    jit_code_casr,             jit_code_casi,
+#define jit_casr(u, v, w, x)   jit_new_node_wwq(jit_code_casr, u, v, w, x)
+#define jit_casi(u, v, w, x)   jit_new_node_wwq(jit_code_casi, u, v, w, x)
+
     jit_code_last_code
 } jit_code_t;
 
@@ -1081,6 +1086,10 @@ extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t,
 extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t,
                                     jit_int32_t, jit_int32_t,
                                     jit_word_t, jit_word_t);
+#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h)
+extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t,
+                                    jit_word_t, jit_word_t,
+                                    jit_int32_t, jit_int32_t);
 #define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w)
 extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t,
                                     jit_word_t, jit_word_t, jit_float32_t);
index 0af24cb..4925a86 100644 (file)
@@ -276,6 +276,7 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
 #define jit_cc_a2_int          0x00100000      /* arg2 is immediate word */
 #define jit_cc_a2_flt          0x00200000      /* arg2 is immediate float */
 #define jit_cc_a2_dbl          0x00400000      /* arg2 is immediate double */
+#define jit_cc_a2_rlh          0x00800000      /* arg2 is a register pair */
 
 #if __ia64__ || (__sparc__ && __WORDSIZE == 64)
 extern void
index 7d2a99d..7572be7 100644 (file)
@@ -318,6 +318,8 @@ typedef union {
 #  define A64_LDRSB                    0x38e06800
 #  define A64_STR                      0xf8206800
 #  define A64_LDR                      0xf8606800
+#  define A64_LDAXR                    0xc85ffc00
+#  define A64_STLXR                    0xc800fc00
 #  define A64_STRH                     0x78206800
 #  define A64_LDRH                     0x78606800
 #  define A64_LDRSH                    0x78a06800
@@ -445,6 +447,8 @@ typedef union {
 #  define LDR(Rt,Rn,Rm)                        oxxx(A64_LDR,Rt,Rn,Rm)
 #  define LDRI(Rt,Rn,Imm12)            oxxi(A64_LDRI,Rt,Rn,Imm12)
 #  define LDUR(Rt,Rn,Imm9)             oxx9(A64_LDUR,Rt,Rn,Imm9)
+#  define LDAXR(Rt,Rn)                 o_xx(A64_LDAXR,Rt,Rn)
+#  define STLXR(Rs,Rt,Rn)              oxxx(A64_STLXR,Rs,Rn,Rt)
 #  define STRB(Rt,Rn,Rm)               oxxx(A64_STRB,Rt,Rn,Rm)
 #  define STRBI(Rt,Rn,Imm12)           oxxi(A64_STRBI,Rt,Rn,Imm12)
 #  define STURB(Rt,Rn,Imm9)            oxx9(A64_STURB,Rt,Rn,Imm9)
@@ -674,6 +678,11 @@ static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define extr_us(r0,r1)               UXTH(r0,r1)
 #  define extr_i(r0,r1)                        SXTW(r0,r1)
 #  define extr_ui(r0,r1)               UXTW(r0,r1)
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define movr(r0,r1)                  _movr(_jit,r0,r1)
 static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define movi(r0,i0)                  _movi(_jit,r0,i0)
@@ -1826,6 +1835,32 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     }
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    jit_int32_t                r1_reg, iscasi;
+    jit_word_t         retry, done, jump0, jump1;
+    if ((iscasi = (r1 == _NOREG))) {
+       r1_reg = jit_get_reg(jit_class_gpr);
+       r1 = rn(r1_reg);
+       movi(r1, i0);
+    }
+    /* retry: */
+    retry = _jit->pc.w;
+    LDAXR(r0, r1);
+    jump0 = bner(_jit->pc.w, r0, r2);  /* bne done r0 r2 */
+    STLXR(r0, r3, r1);
+    jump1 = bnei(_jit->pc.w, r0, 0);   /* bnei retry r0 0 */
+    /* done: */
+    CSET(r0, CC_EQ);
+    done = _jit->pc.w;
+    patch_at(jump0, done);
+    patch_at(jump1, retry);
+    if (iscasi)
+       jit_unget_reg(r1_reg);
+}
+
 static void
 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
index e1f6d96..90c8774 100644 (file)
     8, /* bswapr_us */
     8, /* bswapr_ui */
     4, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
index f0be046..dadf76e 100644 (file)
@@ -1137,6 +1137,14 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _us);
                case_rr(ext, _i);
                case_rr(ext, _ui);
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rr(mov,);
                case_rrr(movn,);
                case_rrr(movz,);
index 2dd701d..3809aa3 100644 (file)
@@ -315,6 +315,9 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
 #  define negr(r0,r1)                  NEGQ(r1,r0)
 #  define comr(r0,r1)                  NOT(r1,r0)
 #  define addr(r0,r1,r2)               ADDQ(r1,r2,r0)
@@ -827,6 +830,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     patch_at(w, _jit->pc.w);
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    fallback_casx(r0, r1, r2, r3, i0);
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
index ecfeba3..9653e35 100644 (file)
     16,        /* bswapr_us */
     36,        /* bswapr_ui */
     36,        /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
index d7bb3ec..1a78b90 100644 (file)
@@ -64,6 +64,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define PROTO                          1
 #  include "jit_alpha-cpu.c"
 #  include "jit_alpha-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
@@ -1095,6 +1096,14 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _us);
                case_rr(ext, _i);
                case_rr(ext, _ui);
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rrr(movn,);
                case_rrr(movz,);
                case_rr(mov,);
@@ -1503,6 +1512,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_alpha-cpu.c"
 #  include "jit_alpha-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
index 14ba36b..91bb17c 100644 (file)
@@ -36,6 +36,7 @@
 #  define jit_armv5_p()                        (jit_cpu.version >= 5)
 #  define jit_armv5e_p()               (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
 #  define jit_armv6_p()                        (jit_cpu.version >= 6)
+#  define jit_armv7_p()                        (jit_cpu.version >= 7)
 #  define jit_armv7r_p()               0
 #  define stack_framesize              48
 extern int     __aeabi_idivmod(int, int);
@@ -179,7 +180,23 @@ extern unsigned    __aeabi_uidivmod(unsigned, unsigned);
 #  define ARM_XTR8                     0x00000400 /* ?xt? rotate 8 bits */
 #  define ARM_XTR16                    0x00000800 /* ?xt? rotate 16 bits */
 #  define ARM_XTR24                    0x00000c00 /* ?xt? rotate 24 bits */
+#  define ARM_LDREX                    0x01900090
+#  define THUMB2_LDREX                 0xe8500000
+#  define ARM_STREX                    0x01800090
+#  define THUMB2_STREX                 0xe8400000
 /* << ARMv6* */
+/* >> ARMv7 */
+#  define ARM_DMB                      0xf57ff050
+#  define THUMB2_DMB                   0xf3bf8f50
+#  define DMB_SY                       0xf
+#  define DMB_ST                       0xe
+#  define DMB_ISH                      0xb
+#  define DMB_ISHST                    0xa
+#  define DMB_NSH                      0x7
+#  define DMB_NSHT                     0x6
+#  define DMB_OSH                      0x3
+#  define DMB_OSHST                    0x2
+/* << ARMv7 */
 #  define ARM_SHIFT                    0x01a00000
 #  define ARM_R                                0x00000010 /* register shift */
 #  define ARM_LSL                      0x00000000
@@ -399,6 +416,12 @@ static void _tcit(jit_state_t*,unsigned int,int);
 static void _tpp(jit_state_t*,int,int);
 #  define torl(o,rn,im)                        _torl(_jit,o,rn,im)
 static void _torl(jit_state_t*,int,int,int) maybe_unused;
+#  define DMB(im)                      dmb(im)
+#  define T2_DMB(im)                   tdmb(im)
+#  define dmb(im)                      _dmb(_jit, im)
+static void _dmb(jit_state_t *_jit, int im);
+#  define tdmb(im)                     _tdmb(_jit, im)
+static void _tdmb(jit_state_t *_jit, int im);
 #  define CC_MOV(cc,rd,rm)             corrr(cc,ARM_MOV,0,rd,rm)
 #  define MOV(rd,rm)                   CC_MOV(ARM_CC_AL,rd,rm)
 #  define T1_MOV(rd,rm)                        is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7))
@@ -718,6 +741,9 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused;
 #  define CC_LDRDIN(cc,rt,rn,im)       corri8(cc,ARM_LDRDI,rn,rt,im)
 #  define LDRDIN(rt,rn,im)             CC_LDRDIN(ARM_CC_AL,rt,rn,im)
 #  define T2_LDRDIN(rt,rt2,rn,im)      torrri8(THUMB2_LDRDI,rn,rt,rt2,im)
+#  define CC_LDREX(cc,rt,rn)           corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf)
+#  define LDREX(rt,rn)                 CC_LDREX(ARM_CC_AL,rt,rn)
+#  define T2_LDREX(rt,rn,im)           torrri8(THUMB2_LDREX,rn,rt,0xf,im)
 #  define CC_STRB(cc,rt,rn,rm)         corrr(cc,ARM_STRB|ARM_P,rn,rt,rm)
 #  define STRB(rt,rn,rm)               CC_STRB(ARM_CC_AL,rt,rn,rm)
 #  define T1_STRB(rt,rn,rm)            is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
@@ -771,6 +797,9 @@ static void _torl(jit_state_t*,int,int,int) maybe_unused;
 #  define CC_STRDIN(cc,rt,rn,im)       corri8(cc,ARM_STRDI,rn,rt,im)
 #  define STRDIN(rt,rn,im)             CC_STRDIN(ARM_CC_AL,rt,rn,im)
 #  define T2_STRDIN(rt,rt2,rn,im)      torrri8(THUMB2_STRDI,rn,rt,rt2,im)
+#  define CC_STREX(cc,rd,rt,rn)                corrrr(cc,ARM_STREX,rn,rd,0xf,rt)
+#  define STREX(rd,rt,rn)              CC_STREX(ARM_CC_AL,rd,rt,rn)
+#  define T2_STREX(rd,rt,rn,im)                torrri8(THUMB2_STREX,rn,rt,rd,im)
 #  define CC_LDMIA(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im)
 #  define LDMIA(rn,im)                 CC_LDMIA(ARM_CC_AL,rn,im)
 #  define CC_LDM(cc,rn,im)             CC_LDMIA(cc,rn,im)
@@ -847,6 +876,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define comr(r0,r1)                  _comr(_jit,r0,r1)
 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define negr(r0,r1)                  _negr(_jit,r0,r1)
@@ -1508,6 +1542,22 @@ _torl(jit_state_t *_jit, int o, int rn, int im)
     iss(thumb.s[0], thumb.s[1]);
 }
 
+static void
+_dmb(jit_state_t *_jit, int im)
+{
+    assert(!(im & 0xfffffff0));
+    ii(ARM_DMB|im);
+}
+
+static void
+_tdmb(jit_state_t *_jit, int im)
+{
+    jit_thumb_t        thumb;
+    assert(!(im & 0xfffffff0));
+    thumb.i = THUMB2_DMB | im;
+    iss(thumb.s[0], thumb.s[1]);
+}
+
 static void
 _nop(jit_state_t *_jit, jit_int32_t i0)
 {
@@ -1610,6 +1660,55 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     _movznr(_jit, ARM_CC_EQ, r0, r1, r2);
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    jit_int32_t                r1_reg, iscasi;
+    jit_word_t         retry, done, jump0, jump1;
+    if (!jit_armv7_p())
+       fallback_casx(r0, r1, r2, r3, i0);
+    else {
+       if ((iscasi = (r1 == _NOREG))) {
+           r1_reg = jit_get_reg(jit_class_gpr);
+           r1 = rn(r1_reg);
+           movi(r1, i0);
+       }
+       if (jit_thumb_p()) {
+           T2_DMB(DMB_ISH);
+           /* retry: */
+           retry = _jit->pc.w;
+           T2_LDREX(r0, r1, 0);
+           jump0 = bner(_jit->pc.w, r0, r2);   /* bne done r0 r2 */
+           T2_STREX(r0, r3, r1, 0);
+           jump1 = bnei(_jit->pc.w, r0, 0);    /* bnei retry r0 0 */
+           /* done: */
+           done = _jit->pc.w;
+           /* r0 = 0 if memory updated, 1 otherwise */
+           xori(r0, r0, 1);
+           T2_DMB(DMB_ISH);
+       }
+       else {
+           DMB(DMB_ISH);
+           /* retry: */
+           retry = _jit->pc.w;
+           LDREX(r0, r1);
+           jump0 = bner(_jit->pc.w, r0, r2);   /* bne done r0 r2 */
+           STREX(r0, r3, r1);
+           jump1 = bnei(_jit->pc.w, r0, 0);    /* bnei retry r0 0 */
+           /* done: */
+           done = _jit->pc.w;
+           /* r0 = 0 if memory updated, 1 otherwise */
+           xori(r0, r0, 1);
+           DMB(DMB_ISH);
+       }
+       patch_at(arm_patch_jump, jump0, done);
+       patch_at(arm_patch_jump, jump1, retry);
+       if (iscasi)
+           jit_unget_reg(r1_reg);
+    }
+}
+
 static void
 _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
index 293d306..7997009 100644 (file)
     8, /* bswapr_us */
     4, /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
 
     20,        /* bswapr_us */
     16,        /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
index 0fdd1a7..ae0e9f5 100644 (file)
@@ -90,6 +90,7 @@ extern void __clear_cache(void *, void *);
 #  include "jit_arm-cpu.c"
 #  include "jit_arm-swf.c"
 #  include "jit_arm-vfp.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
@@ -1504,6 +1505,14 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _uc);
                case_rr(ext, _s);
                case_rr(ext, _us);
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rr(mov,);
                case_rrr(movn,);
                case_rrr(movz,);
@@ -2003,6 +2012,7 @@ _emit_code(jit_state_t *_jit)
 #  include "jit_arm-cpu.c"
 #  include "jit_arm-swf.c"
 #  include "jit_arm-vfp.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
index 25983a6..856a70b 100644 (file)
@@ -53,6 +53,20 @@ static jit_state_t            *disasm_jit;
 static FILE                     *disasm_stream;
 #endif
 
+#if BINUTILS_2_38
+static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...)
+{
+  va_list args;
+  int r;
+
+  va_start(args, fmt);
+  r = vprintf(fmt, args);
+  va_end(args);
+
+  return r;
+}
+#endif
+
 /*
  * Implementation
  */
@@ -77,7 +91,11 @@ jit_init_debug(const char *progname)
     if (!disasm_stream)
        disasm_stream = stdout;
 
+#if BINUTILS_2_38
+    INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf, fprintf_styled);
+#else
     INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf);
+#endif
     disasm_info.arch = bfd_get_arch(disasm_bfd);
     disasm_info.mach = bfd_get_mach(disasm_bfd);
 
diff --git a/deps/lightning/lib/jit_fallback.c b/deps/lightning/lib/jit_fallback.c
new file mode 100644 (file)
index 0000000..9251947
--- /dev/null
@@ -0,0 +1,177 @@
+#if PROTO
+#define fallback_save(r0)              _fallback_save(_jit, r0)
+static void _fallback_save(jit_state_t*, jit_int32_t);
+#define fallback_load(r0)              _fallback_load(_jit, r0)
+static void _fallback_load(jit_state_t*, jit_int32_t);
+#define fallback_save_regs(r0)         _fallback_save_regs(_jit, r0)
+static void _fallback_save_regs(jit_state_t*, jit_int32_t);
+#define fallback_load_regs(r0)         _fallback_load_regs(_jit, r0)
+static void _fallback_load_regs(jit_state_t*, jit_int32_t);
+#define fallback_calli(i0, i1)         _fallback_calli(_jit, i0, i1)
+static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t);
+#define fallback_casx(r0,r1,r2,r3,im)  _fallback_casx(_jit,r0,r1,r2,r3,im)
+static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t,
+                          jit_int32_t, jit_int32_t, jit_word_t);
+#endif
+
+#if CODE
+static void
+_fallback_save(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                offset, regno, spec;
+    for (offset = 0; offset < JIT_R_NUM; offset++) {
+       spec =  _rvs[offset].spec;
+       regno = jit_regno(spec);
+       if (regno == r0) {
+           if (!(spec & jit_class_sav))
+               stxi(_jitc->function->regoff[offset], rn(JIT_FP), regno);
+           break;
+       }
+    }
+}
+
+static void
+_fallback_load(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                offset, regno, spec;
+    for (offset = 0; offset < JIT_R_NUM; offset++) {
+       spec =  _rvs[offset].spec;
+       regno = jit_regno(spec);
+       if (regno == r0) {
+           if (!(spec & jit_class_sav))
+               ldxi(regno, rn(JIT_FP), _jitc->function->regoff[offset]);
+           break;
+       }
+    }
+}
+
+static void
+_fallback_save_regs(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                offset, regno, spec;
+    for (offset = 0; offset < JIT_R_NUM; offset++) {
+       regno = JIT_R(offset);
+       spec =  _rvs[regno].spec;
+       if ((spec & jit_class_gpr) && regno == r0)
+           continue;
+       if (!(spec & jit_class_sav)) {
+           if (!_jitc->function->regoff[regno]) {
+               _jitc->function->regoff[regno] =
+                   jit_allocai(sizeof(jit_word_t));
+               _jitc->again = 1;
+           }
+           jit_regset_setbit(&_jitc->regsav, regno);
+           emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno);
+       }
+    }
+    /* If knew for certain float registers are not used by
+     * pthread_mutex_lock and pthread_mutex_unlock, could skip this */
+    for (offset = 0; offset < JIT_F_NUM; offset++) {
+       regno = JIT_F(offset);
+       spec =  _rvs[regno].spec;
+       if (!(spec & jit_class_sav)) {
+           if (!_jitc->function->regoff[regno]) {
+               _jitc->function->regoff[regno] =
+                   jit_allocai(sizeof(jit_word_t));
+               _jitc->again = 1;
+           }
+           jit_regset_setbit(&_jitc->regsav, regno);
+           emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno);
+       }
+    }
+}
+
+static void
+_fallback_load_regs(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                offset, regno, spec;
+    for (offset = 0; offset < JIT_R_NUM; offset++) {
+       regno = JIT_R(offset);
+       spec =  _rvs[regno].spec;
+       if ((spec & jit_class_gpr) && regno == r0)
+           continue;
+       if (!(spec & jit_class_sav)) {
+           jit_regset_clrbit(&_jitc->regsav, regno);
+           emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]);
+       }
+    }
+    /* If knew for certain float registers are not used by
+     * pthread_mutex_lock and pthread_mutex_unlock, could skip this */
+    for (offset = 0; offset < JIT_F_NUM; offset++) {
+       regno = JIT_F(offset);
+       spec =  _rvs[regno].spec;
+       if (!(spec & jit_class_sav)) {
+           jit_regset_clrbit(&_jitc->regsav, regno);
+           emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]);
+       }
+    }
+}
+
+static void
+_fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1)
+{
+#  if defined(__mips__)
+    movi(rn(_A0), i1);
+#  elif defined(__arm__)
+    movi(rn(_R0), i1);
+#  elif defined(__sparc__)
+    movi(rn(_O0), i1);
+#  elif defined(__ia64__)
+    /* avoid confusion with pushargi patching */
+    if (i1 >= -2097152 && i1 <= 2097151)
+       MOVI(_jitc->rout, i1);
+    else
+       MOVL(_jitc->rout, i1);
+#  elif defined(__hppa__)
+    movi(_R26_REGNO, i1);
+#  elif defined(__s390__) || defined(__s390x__)
+    movi(rn(_R2), i1);
+#  elif defined(__alpha__)
+    movi(rn(_A0), i1);
+#  elif defined(__riscv__)
+    movi(rn(JIT_RA0), i1);
+#  endif
+    calli(i0);
+}
+
+static void
+_fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+              jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    jit_int32_t                r1_reg, iscasi;
+    jit_word_t         jump, done;
+    /* XXX only attempts to fallback cas for lightning jit code */
+    static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+    if ((iscasi = r1 == _NOREG)) {
+       r1_reg = jit_get_reg(jit_class_gpr);
+       r1 = rn(r1_reg);
+       movi(r1, i0);
+    }
+    fallback_save_regs(r0);
+    fallback_calli((jit_word_t)pthread_mutex_lock, (jit_word_t)&mutex);
+    fallback_load(r1);
+    ldr(r0, r1);
+    fallback_load(r2);
+    eqr(r0, r0, r2);
+    fallback_save(r0);
+    jump = bnei(_jit->pc.w, r0, 1);
+    fallback_load(r3);
+#  if __WORDSIZE == 32
+    str_i(r1, r3);
+#  else
+    str_l(r1, r3);
+#  endif
+    /* done: */
+    done = _jit->pc.w;
+    fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex);
+    fallback_load(r0);
+#  if defined(__arm__)
+    patch_at(arm_patch_jump, jump, done);
+#  else
+    patch_at(jump, done);
+#  endif
+    fallback_load_regs(r0);
+    if (iscasi)
+       jit_unget_reg(r1_reg);
+}
+#endif
index 6ca54f3..155ec91 100644 (file)
@@ -652,6 +652,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #define comr(r0,r1)            UADDCM(_R0_REGNO,r1,r0)
 #define negr(r0,r1)            SUB(_R0_REGNO,r1,r0)
 #define extr_c(r0,r1)          EXTRWR(r1,31,8,r0)
@@ -1651,6 +1656,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     patch_at(w, _jit->pc.w);
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    fallback_casx(r0, r1, r2, r3, i0);
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
index 1bfb7e6..e984bac 100644 (file)
     36,        /* bswapr_us */
     80,        /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
index 2668842..b994571 100644 (file)
@@ -25,6 +25,7 @@
 #define PROTO                          1
 #  include "jit_hppa-cpu.c"
 #  include "jit_hppa-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
@@ -1028,6 +1029,14 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rrr(movn,);
                case_rrr(movz,);
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rr(mov,);
            case jit_code_movi:
                if (node->flag & jit_flag_node) {
@@ -1459,6 +1468,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_hppa-cpu.c"
 #  include "jit_hppa-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
index 63bb92d..b28e8f1 100644 (file)
@@ -1311,6 +1311,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
 static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
@@ -3499,6 +3504,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     patch_at(w, _jit->pc.w);
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    fallback_casx(r0, r1, r2, r3, i0);
+}
+
 static void
 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
index c81b3ea..020349d 100644 (file)
     48,        /* bswapr_us */
     48,        /* bswapr_ui */
     16,        /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
index 8b4cd00..5664762 100644 (file)
@@ -52,6 +52,7 @@ extern void __clear_cache(void *, void *);
 #define PROTO                          1
 #  include "jit_ia64-cpu.c"
 #  include "jit_ia64-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
@@ -1175,6 +1176,14 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rrr(movn,);
                case_rrr(movz,);
                case_rr(mov,);
@@ -1693,6 +1702,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_ia64-cpu.c"
 #  include "jit_ia64-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
index 0625589..0862592 100644 (file)
@@ -522,6 +522,11 @@ static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movnr(r0,r1,r2)              MOVN(r0, r1, r2)
 #  define movzr(r0,r1,r2)              MOVZ(r0, r1, r2)
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define ldr_c(r0,r1)                 LB(r0,0,r1)
 #  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
@@ -1328,6 +1333,13 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     return (w);
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    fallback_casx(r0, r1, r2, r3, i0);
+}
+
 static void
 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
@@ -2931,6 +2943,32 @@ _callr(jit_state_t *_jit, jit_int32_t r0)
 static void
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
+    if (((_jit->pc.w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
+        if (can_sign_extend_short_p(i0)) {
+            JAL((i0 & ~0xf0000000) >> 2);
+            addiu(_T9_REGNO, _ZERO_REGNO, i0);
+            return;
+        }
+
+        if (can_zero_extend_short_p(i0)) {
+            JAL((i0 & ~0xf0000000) >> 2);
+            ORI(_T9_REGNO, _ZERO_REGNO, i0);
+            return;
+        }
+
+        if (can_sign_extend_int_p(i0)) {
+            if (i0 & 0xffff) {
+                LUI(_T9_REGNO, i0 >> 16);
+                JAL((i0 & ~0xf0000000) >> 2);
+                ORI(_T9_REGNO, _T9_REGNO, i0);
+            } else {
+                JAL((i0 & ~0xf0000000) >> 2);
+                LUI(_T9_REGNO, i0 >> 16);
+            }
+            return;
+        }
+    }
+
     movi(_T9_REGNO, i0);
     JALR(_T9_REGNO);
     NOP(1);
index b4642fa..25f0712 100644 (file)
     20,        /* bswapr_us */
     52,        /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* NEW_ABI */
 #endif /* __WORDSIZE */
 
     20,        /* bswapr_us */
     52,        /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* NEW_ABI */
 #endif /* __WORDSIZE */
 
     20,        /* bswapr_us */
     52,        /* bswapr_ui */
     116,       /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
index 94fe797..ecf025d 100644 (file)
@@ -67,6 +67,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #  include "jit_rewind.c"
 #  include "jit_mips-cpu.c"
 #  include "jit_mips-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
@@ -1073,8 +1074,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
-    jit_movr(_T9, r0);
-    call = jit_callr(_T9);
+    call = jit_callr(r0);
     call->v.w = _jitc->function->self.argi;
 #if NEW_ABI
     call->w.w = call->v.w;
@@ -1433,6 +1433,14 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _i);
                case_rr(ext, _ui);
 #endif
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rrr(movn,);
                case_rrr(movz,);
                case_rr(mov,);
@@ -1875,6 +1883,7 @@ _emit_code(jit_state_t *_jit)
 #  include "jit_rewind.c"
 #  include "jit_mips-cpu.c"
 #  include "jit_mips-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
index ebd3d56..664adff 100644 (file)
@@ -230,4 +230,5 @@ static char *code_name[] = {
     "movr_d_w",                "movi_d_w",
     "bswapr_us",
     "bswapr_ui",               "bswapr_ul",
+    "casr",            "casi",
 };
index cab085f..ef47f9a 100644 (file)
@@ -260,7 +260,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int);
 #  define LHAU(d,a,s)                  FDs(43,d,a,s)
 #  define LHAUX(d,a,b)                 FX(31,d,a,b,375)
 #  define LHAX(d,a,b)                  FX(31,d,a,b,343)
-#  define LHRBX(d,a,b)                 FX(31,d,a,b,790)
+#  define LHBRX(d,a,b)                 FX(31,d,a,b,790)
 #  define LHZ(d,a,s)                   FDs(40,d,a,s)
 #  define LHZU(d,a,s)                  FDs(41,d,a,s)
 #  define LHZUX(d,a,b)                 FX(31,d,a,b,311)
@@ -271,6 +271,7 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int);
 #  define LSWI(d,a,n)                  FX(31,d,a,n,597)
 #  define LSWX(d,a,b)                  FX(31,d,a,b,533)
 #  define LWARX(d,a,b)                 FX(31,d,a,b,20)
+#  define LDARX(d,a,b)                 FX(31,d,a,b,84)
 #  define LWBRX(d,a,b)                 FX(31,d,a,b,534)
 #  define LWA(d,a,s)                   FDs(58,d,a,s|2)
 #  define LWAUX(d,a,b)                 FX(31,d,a,b,373)
@@ -446,6 +447,7 @@ static void _MCRXR(jit_state_t*, jit_int32_t);
 #  define STW(s,a,d)                   FDs(36,s,a,d)
 #  define STWBRX(s,a,b)                        FX(31,s,a,b,662)
 #  define STWCX_(s,a,b)                        FX_(31,s,a,b,150)
+#  define STDCX_(s,a,b)                        FX_(31,s,a,b,214)
 #  define STWU(s,a,d)                  FDs(37,s,a,d)
 #  define STWUX(s,a,b)                 FX(31,s,a,b,183)
 #  define STWX(s,a,b)                  FX(31,s,a,b,151)
@@ -511,6 +513,11 @@ static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define negr(r0,r1)                  NEG(r0,r1)
 #  define comr(r0,r1)                  NOT(r0,r1)
 #  define extr_c(r0,r1)                        EXTSB(r0,r1)
@@ -521,10 +528,12 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 #    define extr_i(r0,r1)              EXTSW(r0,r1)
 #    define extr_ui(r0,r1)             CLRLDI(r0,r1,32)
 #  endif
-#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
-static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
-static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_us_lh(r0,r1,no_flag)  _bswapr_us(_jit,r0,r1,no_flag)
+#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1,0)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
+#  define bswapr_ui_lw(r0,r1,no_flag)  _bswapr_ui(_jit,r0,r1,no_flag)
+#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1,0)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
 #  if __WORDSIZE == 64
 #    define bswapr_ul(r0,r1)           generic_bswapr_ul(_jit,r0,r1)
 #  endif
@@ -1148,8 +1157,70 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 }
 
 static void
-_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
 {
+    jit_int32_t                r1_reg, iscasi;
+    jit_word_t         retry, done, jump0, jump1;
+    if ((iscasi = (r1 == _NOREG))) {
+       r1_reg = jit_get_reg(jit_class_gpr);
+       r1 = rn(r1_reg);
+       movi(r1, i0);
+    }
+    SYNC();
+    /* retry: */
+    retry = _jit->pc.w;
+#  if __WORDSIZE == 32
+    LWARX(r0, _R0_REGNO, r1);
+#  else
+    LDARX(r0, _R0_REGNO, r1);
+#  endif
+    jump0 = bner(_jit->pc.w, r0, r2);  /* bne done r0 r2 */
+#  if __WORDSIZE == 32
+    STWCX_(r3, _R0_REGNO, r1);
+#  else
+    STDCX_(r3, _R0_REGNO, r1);
+#  endif
+    jump1 = bnei(_jit->pc.w, r0, 0);   /* bne retry r0 0 */
+    /* done: */
+    done = _jit->pc.w;
+    ISYNC();
+    MFCR(r0);
+    patch_at(jump0, done);
+    patch_at(jump1, retry);
+    if (iscasi)
+       jit_unget_reg(r1_reg);
+}
+
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
+{
+    jit_int32_t                reg, addr_reg;
+
+    /* Convert load followed by bswap to a single instruction */
+    /* FIXME r0 and r1 do not need to be the same, only must check if
+     * r1 was loaded in previous instruction */
+    if (no_flag && r0 == r1) {
+        if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00022e | r0 << 21)) {
+            /* Convert LHZX to LHBRX */
+            _jit->pc.ui--;
+            LHBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f);
+            return;
+        }
+
+        if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0xa0000000 | r0 << 21)) {
+            /* Convert LHZ to LHBRX */
+            _jit->pc.ui--;
+            addr_reg = (*_jit->pc.ui >> 16) & 0x1f;
+
+            reg = jit_get_reg(jit_class_gpr);
+            LI(rn(reg), (short)*_jit->pc.ui);
+            LHBRX(r0, rn(reg), addr_reg);
+            jit_unget_reg(reg);
+            return;
+        }
+    }
+
     if (r0 == r1) {
         RLWIMI(r0, r0, 16, 8, 15);
         RLWINM(r0, r0, 24, 16, 31);
@@ -1160,9 +1231,34 @@ _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 }
 
 static void
-_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, addr_reg;
+
+    /* Convert load followed by bswap to a single instruction */
+    /* FIXME r0 and r1 do not need to be the same, only must check if
+     * r1 was loaded in previous instruction */
+    if (no_flag && r0 == r1) {
+        if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00002e | r0 << 21)) {
+            /* Convert LWZX to LWBRX */
+            _jit->pc.ui--;
+            LWBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f);
+            return;
+        }
+
+        if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0x80000000 | r0 << 21)) {
+            /* Convert LWZ to LWBRX */
+            _jit->pc.ui--;
+            addr_reg = (*_jit->pc.ui >> 16) & 0x1f;
+
+            reg = jit_get_reg(jit_class_gpr);
+            LI(rn(reg), (short)*_jit->pc.ui);
+            LWBRX(r0, rn(reg), addr_reg);
+            jit_unget_reg(reg);
+            return;
+        }
+    }
+
     reg = jit_get_reg(jit_class_gpr);
     ROTLWI(rn(reg), r1, 8);
     RLWIMI(rn(reg), r1, 24, 0, 7);
@@ -1428,15 +1524,23 @@ _remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     jit_unget_reg(reg);
 }
 
+#  define is_mask(im)          ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0)
+
 static void
 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, offt;
     if (can_zero_extend_short_p(i0))
        ANDI_(r0, r1, i0);
     else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
        ANDIS_(r0, r1, (jit_uword_t)i0 >> 16);
-    else {
+    else if (__WORDSIZE == 32 && is_mask(i0)) {
+       offt = __builtin_ctzl(i0);
+       RLWINM(r0, r1, 0, 32 - offt - __builtin_popcountl(i0), 31 - offt);
+    } else if (__WORDSIZE == 32 && is_mask(~i0)) {
+       offt = __builtin_ctzl(~i0);
+       RLWINM(r0, r1, 0, 32 - offt, 31 - offt - __builtin_popcountl(~i0));
+    } else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        AND(r0, r1, rn(reg));
@@ -3204,10 +3308,13 @@ _calli(jit_state_t *_jit, jit_word_t i0
 {
 #  if _CALL_SYSV
     jit_word_t         d;
-    d = (i0 - _jit->pc.w) & ~3;
-    if (can_sign_extend_jump_p(d))
-       BL(d);
-    else
+    d = (i0 - _jit->pc.w - !!varargs * 4) & ~3;
+    if (can_sign_extend_jump_p(d)) {
+        /* Tell double arguments were passed in registers. */
+        if (varargs)
+            CREQV(6, 6, 6);
+        BL(d);
+    } else
 #  endif
     {
        movi(_R12_REGNO, i0);
index 387cc6f..18cc621 100644 (file)
@@ -143,8 +143,17 @@ static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define absr_d(r0,r1)                        FABS(r0,r1)
 #  define negr_f(r0,r1)                        negr_d(r0,r1)
 #  define negr_d(r0,r1)                        FNEG(r0,r1)
-#  define sqrtr_f(r0,r1)               FSQRTS(r0,r1)
-#  define sqrtr_d(r0,r1)               FSQRT(r0,r1)
+#  ifdef _ARCH_PPCSQ
+#    define sqrtr_f(r0,r1)             FSQRTS(r0,r1)
+#    define sqrtr_d(r0,r1)             FSQRT(r0,r1)
+#  else
+extern float sqrtf(float);
+#    define sqrtr_f(r0,r1)             _sqrtr_f(_jit,r0,r1)
+static void _sqrtr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+extern double sqrt(double);
+#    define sqrtr_d(r0,r1)             _sqrtr_d(_jit,r0,r1)
+static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  endif
 #  define addr_f(r0,r1,r2)             FADDS(r0,r1,r2)
 #  define addr_d(r0,r1,r2)             FADD(r0,r1,r2)
 #  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
@@ -484,23 +493,40 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
        ldi_d(r0, (jit_word_t)i0);
 }
 
-/* should only work on newer ppc (fcfid is a ppc64 instruction) */
 static void
 _extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 #  if __WORDSIZE == 32
-    jit_int32_t                reg;
+    jit_int32_t                reg, freg, off1, off2;
+
+#  if __BYTE_ORDER == __BIG_ENDIAN
+    off1 = alloca_offset - 8;
+    off2 = alloca_offset - 4;
+#  else
+    off1 = alloca_offset - 4;
+    off2 = alloca_offset - 8;
+#  endif
+
     reg = jit_get_reg(jit_class_gpr);
-    rshi(rn(reg), r1, 31);
-    /* use reserved 8 bytes area */
-    stxi(alloca_offset - 4, _FP_REGNO, r1);
-    stxi(alloca_offset - 8, _FP_REGNO, rn(reg));
+    freg = jit_get_reg(jit_class_fpr);
+
+    movi(rn(reg), 0x43300000);
+    stxi_i(off1, _FP_REGNO, rn(reg));
+    movi(rn(reg), 0x80000000);
+    stxi_i(off2, _FP_REGNO, rn(reg));
+    ldxi_d(rn(freg), _FP_REGNO, alloca_offset - 8);
+    xorr(rn(reg), r1, rn(reg));
+    stxi_i(off2, _FP_REGNO, rn(reg));
+    ldxi_d(r0, _FP_REGNO, alloca_offset - 8);
+    subr_d(r0, r0, rn(freg));
+
     jit_unget_reg(reg);
+    jit_unget_reg(freg);
 #  else
     stxi(alloca_offset - 8, _FP_REGNO, r1);
-#  endif
     ldxi_d(r0, _FP_REGNO, alloca_offset - 8);
     FCFID(r0, r0);
+#  endif
 }
 
 static void
@@ -533,6 +559,32 @@ _truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 }
 #  endif
 
+#  ifndef _ARCH_PPCSQ
+static void
+_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr_f(rn(JIT_FA0), r1);
+    calli((jit_word_t)sqrtf
+#  if _CALL_SYSV
+         , 0
+#  endif
+         );
+    movr_f(r0, rn(JIT_FRET));
+}
+
+static void
+_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    movr_d(rn(JIT_FA0), r1);
+    calli((jit_word_t)sqrt
+#  if _CALL_SYSV
+         , 0
+#  endif
+         );
+    movr_d(r0, rn(JIT_FRET));
+}
+#  endif
+
 #  define fpr_opi(name, type, size)                                    \
 static void                                                            \
 _##name##i_##type(jit_state_t *_jit,                                   \
index 0be7047..9cd006c 100644 (file)
     20,        /* bswapr_us */
     16,        /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* _CALL_SYV */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
     20,        /* bswapr_us */
     16,        /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* _CALL_AIX */
 #endif /* __BYTEORDER */
 #endif /* __powerpc__ */
     20,        /* bswapr_us */
     16,        /* bswapr_ui */
     44,        /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __BYTEORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
     20,        /* bswapr_us */
     16,        /* bswapr_ui */
     44,        /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
index e94d1a5..fd6964e 100644 (file)
@@ -1148,6 +1148,8 @@ _emit_code(jit_state_t *_jit)
     jit_word_t          word;
     jit_int32_t                 value;
     jit_int32_t                 offset;
+    jit_bool_t       no_flag = 0;      /* Set if previous instruction is
+                                        * *not* a jump target. */
     struct {
        jit_node_t      *node;
        jit_word_t       word;
@@ -1356,13 +1358,25 @@ _emit_code(jit_state_t *_jit)
 #  if __WORDSIZE == 64
                case_rr(hton, _ul);
 #  endif
-               case_rr(bswap, _us);
-               case_rr(bswap, _ui);
+           case jit_code_bswapr_us:
+               bswapr_us_lh(rn(node->u.w), rn(node->v.w), no_flag);
+               break;
+           case jit_code_bswapr_ui:
+               bswapr_ui_lw(rn(node->u.w), rn(node->v.w), no_flag);
+               break;
 #  if __WORDSIZE == 64
                case_rr(bswap, _ul);
 #  endif
                case_rr(neg,);
                case_rr(com,);
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rrr(movn,);
                case_rrr(movz,);
                case_rr(mov,);
@@ -1683,7 +1697,7 @@ _emit_code(jit_state_t *_jit)
                    }
                }
                else
-                   (void)jmpi_p(node->u.w);
+                   jmpi(node->u.w);
                break;
            case jit_code_callr:
                callr(rn(node->u.w)
@@ -1823,6 +1837,8 @@ _emit_code(jit_state_t *_jit)
        assert(_jitc->regarg == 0 && _jitc->synth == 0);
        /* update register live state */
        jit_reglive(node);
+
+        no_flag = !(node->flag & jit_flag_patch);
     }
 #undef case_brf
 #undef case_brw
index 61d9650..ee37b02 100644 (file)
@@ -58,7 +58,7 @@ void
 jit_init_print(void)
 {
     if (!print_stream)
-       print_stream = stderr;
+       print_stream = stdout;
 }
 
 void
@@ -107,7 +107,7 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node)
        (jit_cc_a0_int|jit_cc_a0_flt|jit_cc_a0_dbl|jit_cc_a0_jmp|
         jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_arg|
         jit_cc_a1_reg|jit_cc_a1_int|jit_cc_a1_flt|jit_cc_a1_dbl|jit_cc_a1_arg|
-        jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl);
+        jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl|jit_cc_a2_rlh);
     if (!(node->flag & jit_flag_synth) && ((value & jit_cc_a0_jmp) ||
                                           node->code == jit_code_finishr ||
                                           node->code == jit_code_finishi))
@@ -217,6 +217,18 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node)
            print_chr(' ');     print_reg(node->u.q.h);
            print_str(") ");    print_reg(node->v.w);
            print_chr(' ');     print_hex(node->w.w);   return;
+       r_r_q:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');     print_reg(node->v.w);
+           print_str(" (");    print_reg(node->w.q.l);
+           print_chr(' ');     print_reg(node->w.q.h);
+           print_str(") ");    return;
+       r_w_q:
+           print_chr(' ');     print_reg(node->u.w);
+           print_chr(' ');     print_hex(node->v.w);
+           print_str(" (");    print_reg(node->w.q.l);
+           print_chr(' ');     print_reg(node->w.q.h);
+           print_str(") ");    return;
        r_r_f:
            print_chr(' ');     print_reg(node->u.w);
            print_chr(' ');     print_reg(node->v.w);
@@ -357,6 +369,12 @@ _jit_print_node(jit_state_t *_jit, jit_node_t *node)
                case jit_cc_a0_reg|jit_cc_a0_rlh|
                     jit_cc_a1_reg|jit_cc_a2_int:
                    goto q_r_w;
+               case jit_cc_a0_reg|jit_cc_a1_reg|
+                   jit_cc_a2_reg|jit_cc_a2_rlh:
+                   goto r_r_q;
+               case jit_cc_a0_reg|jit_cc_a1_int|
+                   jit_cc_a2_reg|jit_cc_a2_rlh:
+                   goto r_w_q;
                case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_flt:
                    goto r_r_f;
                case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_dbl:
index 9f029c0..5046fac 100644 (file)
@@ -456,6 +456,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+  define casx(r0, r1, r2, r3, i0)      _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define ltr(r0, r1, r2)              SLT(r0, r1, r2)
 #  define lti(r0, r1, im)              _lti(_jit, r0, r1, im)
 static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -1339,6 +1344,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     patch_at(w, _jit->pc.w);
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    fallback_casx(r0, r1, r2, r3, i0);
+}
+
 static void
 _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
index c8908d8..ea2911f 100644 (file)
     20,        /* bswapr_us */
     52,        /* bswapr_ui */
     116,       /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
index 1dc3c9e..966604a 100644 (file)
@@ -34,6 +34,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define PROTO                          1
 #  include "jit_riscv-cpu.c"
 #  include "jit_riscv-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
@@ -1134,6 +1135,14 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _us);
                case_rr(ext, _i);
                case_rr(ext, _ui);
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rrr(movn,);
                case_rrr(movz,);
                case_rr(mov,);
@@ -1558,6 +1567,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_riscv-cpu.c"
 #  include "jit_riscv-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
index 619ab15..2c10787 100644 (file)
@@ -973,6 +973,11 @@ static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define addr(r0,r1,r2)               _addr(_jit,r0,r1,r2)
 static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
@@ -2468,6 +2473,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     patch_at(w, _jit->pc.w);
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    fallback_casx(r0, r1, r2, r3, i0);
+}
+
 static void
 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
index bb9071d..cea2d44 100644 (file)
     52,        /* bswapr_us */
     128,       /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
     68,        /* bswapr_us */
     160,       /* bswapr_ui */
     344,       /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
index 4b89bea..ef0c899 100644 (file)
@@ -88,6 +88,7 @@ extern void __clear_cache(void *, void *);
 #define PROTO                          1
 #  include "jit_s390-cpu.c"
 #  include "jit_s390-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
@@ -1165,6 +1166,14 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _i);
                case_rr(ext, _ui);
 #endif
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rrr(movn,);
                case_rrr(movz,);
                case_rr(mov,);
@@ -1558,6 +1567,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_s390-cpu.c"
 #  include "jit_s390-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
index 90c3767..ecea506 100644 (file)
@@ -552,6 +552,11 @@ static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define comr(r0, r1)                 XNOR(r1, 0, r0)
 #  define negr(r0, r1)                 NEG(r1, r0)
 #  define addr(r0, r1, r2)             ADD(r1, r2, r0)
@@ -1233,6 +1238,13 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     patch_at(w, _jit->pc.w);
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    fallback_casx(r0, r1, r2, r3, i0);
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
index 5ec051d..5e7ef95 100644 (file)
     20,        /* bswapr_us */
     52,        /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
     20,        /* bswapr_us */
     52,        /* bswapr_ui */
     116,       /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __WORDSIZE */
index 23d4442..a677998 100644 (file)
@@ -40,6 +40,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define PROTO                          1
 #  include "jit_sparc-cpu.c"
 #  include "jit_sparc-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
@@ -1477,6 +1478,14 @@ _emit_code(jit_state_t *_jit)
                case_rr(ext, _i);
                case_rr(ext, _ui);
 #endif
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rrr(movn,);
                case_rrr(movz,);
                case_rr(mov,);
@@ -1875,6 +1884,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_sparc-cpu.c"
 #  include "jit_sparc-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
index 81534f0..0d8affe 100644 (file)
@@ -369,6 +369,11 @@ static void _movcr_u(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define movsr_u(r0, r1)              _movsr_u(_jit, r0, r1)
 static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3)           casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #define movnr(r0, r1, r2)              _movnr(_jit, r0, r1, r2)
 static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #define movzr(r0, r1, r2)              _movzr(_jit, r0, r1, r2)
@@ -2218,6 +2223,66 @@ _movsr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     mrm(0x03, r7(r0), r7(r1));
 }
 
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    jit_int32_t                save_rax, restore_rax;
+    jit_int32_t                ascasr_reg, ascasr_use;
+    if (r0 != _RAX_REGNO) {            /* result not in %rax */
+       if (r2 != _RAX_REGNO) {         /* old value not in %rax */
+           save_rax = jit_get_reg(jit_class_gpr);
+           movr(rn(save_rax), _RAX_REGNO);
+           restore_rax = 1;
+       }
+       else
+           restore_rax = 0;
+    }
+    else
+       restore_rax = 0;
+    if (r2 != _RAX_REGNO)
+       movr(_RAX_REGNO, r2);
+    if (r1 == _NOREG) {                        /* using immediate address */
+       if (!can_sign_extend_int_p(i0)) {
+           ascasr_reg = jit_get_reg(jit_class_gpr);
+           if (ascasr_reg == _RAX) {
+               ascasr_reg = jit_get_reg(jit_class_gpr);
+               jit_unget_reg(_RAX);
+           }
+           ascasr_use = 1;
+           movi(rn(ascasr_reg), i0);
+       }
+       else
+           ascasr_use = 0;
+    }
+    else
+       ascasr_use = 0;
+    ic(0xf0);          /* lock */
+    if (ascasr_use)
+       rex(0, WIDE, r3, _NOREG, rn(ascasr_reg));
+    else
+       rex(0, WIDE, r3, _NOREG, r1);
+    ic(0x0f);
+    ic(0xb1);
+    if (r1 != _NOREG)                  /* casr */
+       rx(r3, 0, r1, _NOREG, _SCL1);
+    else {                             /* casi */
+       if (ascasr_use)
+           rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1);   /* address in reg */
+       else
+           rx(r3, i0, _NOREG, _NOREG, _SCL1);          /* address in offset */
+    }
+    cc(X86_CC_E, r0);
+    if (r0 != _RAX_REGNO)
+       movr(r0, _RAX_REGNO);
+    if (restore_rax) {
+       movr(_RAX_REGNO, rn(save_rax));
+       jit_unget_reg(save_rax);
+    }
+    if (ascasr_use)
+       jit_unget_reg(ascasr_reg);
+}
+
 static void
 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
index bd4b9a0..ff7548a 100644 (file)
     7, /* bswapr_us */
     4, /* bswapr_ui */
     0, /* bswapr_ul */
+    9, /* casr */
+    0, /* casi */
 #endif
 
 #if __X64
     9, /* bswapr_us */
     6, /* bswapr_ui */
     6, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #else
 
 #  if __X64_32
     9, /* bswapr_us */
     6, /* bswapr_ui */
     0, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 
 #  else
 #define JIT_INSTR_MAX 115
     9, /* bswapr_us */
     6, /* bswapr_ui */
     6, /* bswapr_ul */
+    0, /* casr */
+    0, /* casi */
 #endif /* __CYGWIN__ || _WIN32 */
 #  endif /* __X64_32 */
 #endif /* __X64 */
index e3e1383..fb0b06b 100644 (file)
@@ -1674,6 +1674,14 @@ _emit_code(jit_state_t *_jit)
                case_rrw(gt, _u);
                case_rrr(ne,);
                case_rrw(ne,);
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
                case_rrr(movn,);
                case_rrr(movz,);
                case_rr(mov,);
index b78bd07..e7ce383 100644 (file)
@@ -105,7 +105,7 @@ static jit_bool_t
 _reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node);
 
 #define redundant_store(node, jump)    _redundant_store(_jit, node, jump)
-static void
+static jit_bool_t
 _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump);
 
 #define simplify_movr(p, n, k, s)      _simplify_movr(_jit, p, n, k, s)
@@ -131,7 +131,7 @@ static void
 _simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno);
 
 #define simplify()                     _simplify(_jit)
-static void
+static jit_bool_t
 _simplify(jit_state_t *_jit);
 
 #define jit_reg_undef                  -1
@@ -1138,6 +1138,20 @@ _jit_new_node_qww(jit_state_t *_jit, jit_code_t code,
     return (link_node(node));
 }
 
+jit_node_t *
+_jit_new_node_wwq(jit_state_t *_jit, jit_code_t code,
+                 jit_word_t u, jit_word_t v,
+                 jit_int32_t l, jit_int32_t h)
+{
+    jit_node_t         *node = new_node(code);
+    assert(!_jitc->realize);
+    node->u.w = u;
+    node->v.w = v;
+    node->w.q.l = l;
+    node->w.q.h = h;
+    return (link_node(node));
+}
+
 jit_node_t *
 _jit_new_node_wwf(jit_state_t *_jit, jit_code_t code,
                  jit_word_t u, jit_word_t v, jit_float32_t w)
@@ -1539,6 +1553,14 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_movnr:    case jit_code_movzr:
            mask = jit_cc_a0_reg|jit_cc_a0_cnd|jit_cc_a1_reg|jit_cc_a2_reg;
            break;
+       case jit_code_casr:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|
+                  jit_cc_a2_reg|jit_cc_a2_rlh;
+           break;
+       case jit_code_casi:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|
+                  jit_cc_a2_reg|jit_cc_a2_rlh;
+           break;
        default:
            abort();
     }
@@ -1604,6 +1626,7 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label)
 void
 _jit_optimize(jit_state_t *_jit)
 {
+    jit_int32_t                 pass;
     jit_bool_t          jump;
     jit_bool_t          todo;
     jit_int32_t                 mask;
@@ -1617,6 +1640,9 @@ _jit_optimize(jit_state_t *_jit)
     sequential_labels();
     split_branches();
 
+    pass = 0;
+
+second_pass:
     /* create initial mapping of live register values
      * at the start of a basic block */
     for (offset = 0; offset < _jitc->blocks.offset; offset++) {
@@ -1640,28 +1666,58 @@ _jit_optimize(jit_state_t *_jit)
        }
     } while (todo);
 
-    patch_registers();
-    simplify();
+    if (pass == 0) {
+       todo = 0;
 
-    /* figure out labels that are only reached with a jump
-     * and is required to do a simple redundant_store removal
-     * on jit_beqi below */
-    jump = 1;
-    for (node = _jitc->head; node; node = node->next) {
-       switch (node->code) {
-           case jit_code_label:
-               if (!jump)
-                   node->flag |= jit_flag_head;
-               break;
-           case jit_code_jmpi:         case jit_code_jmpr:
-           case jit_code_epilog:
-               jump = 1;
-               break;
-           case jit_code_data:         case jit_code_note:
-               break;
-           default:
-               jump = 0;
-               break;
+       patch_registers();
+       if (simplify())
+           todo = 1;
+
+       /* figure out labels that are only reached with a jump
+        * and is required to do a simple redundant_store removal
+        * on jit_beqi below */
+       jump = 1;
+       for (node = _jitc->head; node; node = node->next) {
+           switch (node->code) {
+               case jit_code_label:
+                   if (!jump)
+                       node->flag |= jit_flag_head;
+                       break;
+               case jit_code_jmpi:             case jit_code_jmpr:
+               case jit_code_epilog:
+                   jump = 1;
+                   break;
+               case jit_code_data:             case jit_code_note:
+                   break;
+               default:
+                   jump = 0;
+                   break;
+           }
+       }
+
+       for (node = _jitc->head; node; node = node->next) {
+           mask = jit_classify(node->code);
+           if (mask & jit_cc_a0_reg)
+               node->u.w &= ~jit_regno_patch;
+           if (mask & jit_cc_a1_reg)
+               node->v.w &= ~jit_regno_patch;
+           if (mask & jit_cc_a2_reg)
+               node->w.w &= ~jit_regno_patch;
+           if (node->code == jit_code_beqi) {
+               if (redundant_store(node, 1))
+                   todo = 1;
+           }
+           else if (node->code == jit_code_bnei) {
+               if (redundant_store(node, 0))
+                   todo = 1;
+           }
+       }
+
+       /* If instructions were removed, must recompute state at
+        * start of blocks. */
+       if (todo) {
+           pass = 1;
+           goto second_pass;
        }
     }
 
@@ -1673,69 +1729,59 @@ _jit_optimize(jit_state_t *_jit)
            node->v.w &= ~jit_regno_patch;
        if (mask & jit_cc_a2_reg)
            node->w.w &= ~jit_regno_patch;
-       switch (node->code) {
-           case jit_code_prolog:
-               _jitc->function = _jitc->functions.ptr + node->w.w;
-               break;
-           case jit_code_epilog:
-               _jitc->function = NULL;
-               break;
-           case jit_code_beqi:
-               redundant_store(node, 1);
-               break;
-           case jit_code_bnei:
-               redundant_store(node, 0);
-               break;
-           default:
+       if  (node->code == jit_code_prolog)
+           _jitc->function = _jitc->functions.ptr + node->w.w;
+       else if(node->code == jit_code_epilog)
+           _jitc->function = NULL;
+       else {
 #if JIT_HASH_CONSTS
-               if (mask & jit_cc_a0_flt) {
-                   node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a0_dbl) {
-                   node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a1_flt) {
-                   node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a1_dbl) {
-                   node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a2_flt) {
-                   node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
-               else if (mask & jit_cc_a2_dbl) {
-                   node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8);
-                   node->flag |= jit_flag_node | jit_flag_data;
-               }
+           if (mask & jit_cc_a0_flt) {
+               node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a0_dbl) {
+               node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a1_flt) {
+               node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a1_dbl) {
+               node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a2_flt) {
+               node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
+           else if (mask & jit_cc_a2_dbl) {
+               node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8);
+               node->flag |= jit_flag_node | jit_flag_data;
+           }
 #endif
-               if (_jitc->function) {
-                   if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
-                       (jit_cc_a0_reg|jit_cc_a0_chg)) {
-                       if (mask & jit_cc_a0_rlh) {
-                           jit_regset_setbit(&_jitc->function->regset,
-                                             jit_regno(node->u.q.l));
-                           jit_regset_setbit(&_jitc->function->regset,
-                                             jit_regno(node->u.q.h));
-                       }
-                       else
-                           jit_regset_setbit(&_jitc->function->regset,
-                                             jit_regno(node->u.w));
-                   }
-                   if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
-                       (jit_cc_a1_reg|jit_cc_a1_chg))
+           if (_jitc->function) {
+               if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
+                   (jit_cc_a0_reg|jit_cc_a0_chg)) {
+                   if (mask & jit_cc_a0_rlh) {
+                       jit_regset_setbit(&_jitc->function->regset,
+                                         jit_regno(node->u.q.l));
                        jit_regset_setbit(&_jitc->function->regset,
-                                         jit_regno(node->v.w));
-                   if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
-                       (jit_cc_a2_reg|jit_cc_a2_chg))
+                                         jit_regno(node->u.q.h));
+                   }
+                   else
                        jit_regset_setbit(&_jitc->function->regset,
-                                         jit_regno(node->w.w));
+                                         jit_regno(node->u.w));
                }
-               break;
+               if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
+                   (jit_cc_a1_reg|jit_cc_a1_chg))
+                   jit_regset_setbit(&_jitc->function->regset,
+                                     jit_regno(node->v.w));
+               if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
+                   (jit_cc_a2_reg|jit_cc_a2_chg))
+                   jit_regset_setbit(&_jitc->function->regset,
+                                     jit_regno(node->w.w));
+           }
        }
     }
 }
@@ -1806,13 +1852,24 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node)
                else
                    jit_regset_setbit(&_jitc->reglive, node->v.w);
            }
-           if ((value & jit_cc_a2_reg) && !(node->w.w & jit_regno_patch)) {
-               if (value & jit_cc_a2_chg) {
-                   jit_regset_clrbit(&_jitc->reglive, node->w.w);
-                   jit_regset_setbit(&_jitc->regmask, node->w.w);
+           if (value & jit_cc_a2_reg) {
+               if (value & jit_cc_a2_rlh) {
+                   /* Assume registers are not changed */
+                   if (!(node->w.q.l & jit_regno_patch))
+                       jit_regset_setbit(&_jitc->reglive, node->w.q.l);
+                   if (!(node->w.q.h & jit_regno_patch))
+                       jit_regset_setbit(&_jitc->reglive, node->w.q.h);
+               }
+               else {
+                   if (!(node->w.w & jit_regno_patch)) {
+                       if (value & jit_cc_a2_chg) {
+                           jit_regset_clrbit(&_jitc->reglive, node->w.w);
+                           jit_regset_setbit(&_jitc->regmask, node->w.w);
+                       }
+                       else
+                           jit_regset_setbit(&_jitc->reglive, node->w.w);
+                   }
                }
-               else
-                   jit_regset_setbit(&_jitc->reglive, node->w.w);
            }
            if (jit_regset_set_p(&_jitc->regmask)) {
                jit_update(node->next, &_jitc->reglive, &_jitc->regmask);
@@ -1843,8 +1900,14 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
     }
     if (value & jit_cc_a1_reg)
        jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w));
-    if (value & jit_cc_a2_reg)
-       jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
+    if (value & jit_cc_a2_reg) {
+       if (value & jit_cc_a2_rlh) {
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.l));
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.h));
+       }
+       else
+           jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
+    }
 }
 
 void
@@ -1863,8 +1926,14 @@ _jit_regarg_clr(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
     }
     if (value & jit_cc_a1_reg)
        jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w));
-    if (value & jit_cc_a2_reg)
-       jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w));
+    if (value & jit_cc_a2_reg) {
+       if (value & jit_cc_a2_rlh) {
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.l));
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.h));
+       }
+       else
+           jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w));
+    }
 }
 
 void
@@ -2302,11 +2371,26 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo)
            default:
                value = jit_classify(node->code);
                if (value & jit_cc_a2_reg) {
-                   if (!(node->w.w & jit_regno_patch)) {
-                       if (jit_regset_tstbit(&regmask, node->w.w)) {
-                           jit_regset_clrbit(&regmask, node->w.w);
-                           if (!(value & jit_cc_a2_chg))
-                               jit_regset_setbit(&reglive, node->w.w);
+                   if (value & jit_cc_a2_rlh) {
+                       if (!(node->w.q.l & jit_regno_patch)) {
+                           /* Assume register is not changed */
+                           if (jit_regset_tstbit(&regmask, node->w.q.l))
+                               jit_regset_clrbit(&regmask, node->w.q.l);
+                       }
+                       if (!(node->w.q.h & jit_regno_patch)) {
+                           if (jit_regset_tstbit(&regmask, node->w.q.h))
+                               jit_regset_clrbit(&regmask, node->w.q.h);
+                       }
+                   }
+                   else {
+                       if (value & jit_cc_a2_reg) {
+                           if (!(node->w.w & jit_regno_patch)) {
+                               if (jit_regset_tstbit(&regmask, node->w.w)) {
+                                   jit_regset_clrbit(&regmask, node->w.w);
+                                   if (!(value & jit_cc_a2_chg))
+                                       jit_regset_setbit(&reglive, node->w.w);
+                               }
+                           }
                        }
                    }
                }
@@ -2374,19 +2458,19 @@ _jit_follow(jit_state_t *_jit, jit_block_t *block, jit_bool_t *todo)
                         * means that only JIT_Vn registers can be trusted on
                         * arrival of jmpr.
                         */
+                       jit_regset_set_ui(&regmask, 0);
                        for (regno = 0; regno < _jitc->reglen; regno++) {
                            spec = jit_class(_rvs[regno].spec);
-                           if (jit_regset_tstbit(&regmask, regno) &&
-                               (spec & (jit_class_gpr|jit_class_fpr)) &&
-                               !(spec & jit_class_sav))
-                               jit_regset_clrbit(&regmask, regno);
+                           if ((spec & (jit_class_gpr|jit_class_fpr)) &&
+                               (spec & jit_class_sav))
+                               jit_regset_setbit(&regmask, regno);
                        }
                        /*   Assume non callee save registers are live due
                         * to jump to unknown location. */
                        /* Treat all callee save as live. */
-                       jit_regset_ior(&reglive, &reglive, &regmask);
+                       jit_regset_ior(&block->reglive, &reglive, &regmask);
                        /* Treat anything else as dead. */
-                       jit_regset_set_ui(&regmask, 0);
+                       return;
                    }
                }
                break;
@@ -2453,11 +2537,24 @@ _jit_update(jit_state_t *_jit, jit_node_t *node,
            default:
                value = jit_classify(node->code);
                if (value & jit_cc_a2_reg) {
-                   if (!(node->w.w & jit_regno_patch)) {
-                       if (jit_regset_tstbit(mask, node->w.w)) {
-                           jit_regset_clrbit(mask, node->w.w);
-                           if (!(value & jit_cc_a2_chg))
-                               jit_regset_setbit(live, node->w.w);
+                   if (value & jit_cc_a2_rlh) {
+                       if (!(node->w.q.l & jit_regno_patch)) {
+                           /* Assume register is not changed */
+                           if (jit_regset_tstbit(mask, node->w.q.l))
+                               jit_regset_clrbit(mask, node->w.q.l);
+                       }
+                       if (!(node->w.q.h & jit_regno_patch)) {
+                           if (jit_regset_tstbit(mask, node->w.q.h))
+                               jit_regset_clrbit(mask, node->w.q.h);
+                       }
+                   }
+                   else {
+                       if (!(node->w.w & jit_regno_patch)) {
+                           if (jit_regset_tstbit(mask, node->w.w)) {
+                               jit_regset_clrbit(mask, node->w.w);
+                               if (!(value & jit_cc_a2_chg))
+                                   jit_regset_setbit(live, node->w.w);
+                           }
                        }
                    }
                }
@@ -2522,19 +2619,19 @@ _jit_update(jit_state_t *_jit, jit_node_t *node,
                         * means that only JIT_Vn registers can be trusted on
                         * arrival of jmpr.
                         */
+                       jit_regset_set_ui(mask, 0);
                        for (regno = 0; regno < _jitc->reglen; regno++) {
                            spec = jit_class(_rvs[regno].spec);
-                           if (jit_regset_tstbit(mask, regno) &&
-                               (spec & (jit_class_gpr|jit_class_fpr)) &&
-                               !(spec & jit_class_sav))
-                               jit_regset_clrbit(mask, regno);
+                           if ((spec & (jit_class_gpr|jit_class_fpr)) &&
+                               (spec & jit_class_sav))
+                               jit_regset_setbit(mask, regno);
                        }
                        /*   Assume non callee save registers are live due
                         * to jump to unknown location. */
                        /* Treat all callee save as live. */
                        jit_regset_ior(live, live, mask);
                        /* Treat anything else as dead. */
-                       jit_regset_set_ui(mask, 0);
+                       return;
                    }
                }
                break;
@@ -2930,7 +3027,7 @@ _reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
     return (0);
 }
 
-static void
+static jit_bool_t
 _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump)
 {
     jit_node_t         *iter;
@@ -2938,30 +3035,33 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump)
     jit_word_t          word;
     jit_int32_t                 spec;
     jit_int32_t                 regno;
+    jit_bool_t          result;
 
     if (jump) {
        prev = node->u.n;
        if (prev->code == jit_code_epilog)
-           return;
+           return (0);
        assert(prev->code == jit_code_label);
        if ((prev->flag & jit_flag_head) || node->link || prev->link != node)
            /* multiple sources */
-           return;
+           return (0);
        /* if there are sequential labels it will return below */
     }
     else
        prev = node;
+    result = 0;
     word = node->w.w;
     regno = jit_regno(node->v.w);
     for (iter = prev->next; iter; prev = iter, iter = iter->next) {
        switch (iter->code) {
            case jit_code_label:        case jit_code_prolog:
            case jit_code_epilog:
-               return;
+               return (result);
            case jit_code_movi:
                if (regno == jit_regno(iter->u.w)) {
                    if (iter->flag || iter->v.w != word)
-                       return;
+                       return (result);
+                   result = 1;
                    del_node(prev, iter);
                    iter = prev;
                }
@@ -2969,28 +3069,28 @@ _redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump)
            default:
                spec = jit_classify(iter->code);
                if (spec & jit_cc_a0_jmp)
-                   return;
+                   return (result);
                if ((spec & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
                    (jit_cc_a0_reg|jit_cc_a0_chg)) {
                    if (spec & jit_cc_a0_rlh) {
                        if (regno == jit_regno(iter->u.q.l) ||
                            regno == jit_regno(iter->u.q.h))
-                           return;
+                           return (result);
                    }
                    else {
                        if (regno == jit_regno(iter->u.w))
-                           return;
+                           return (result);
                    }
                }
                if ((spec & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
                    (jit_cc_a1_reg|jit_cc_a1_chg)) {
                    if (regno == jit_regno(iter->v.w))
-                       return;
+                       return (result);
                }
                if ((spec & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
                    (jit_cc_a2_reg|jit_cc_a2_chg)) {
                    if (regno == jit_regno(iter->w.w))
-                       return;
+                       return (result);
                }
                break;
        }
@@ -3182,7 +3282,7 @@ _simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
  * once to the same value, and is a common pattern of calls
  * to jit_pushargi and jit_pushargr
  */
-static void
+static jit_bool_t
 _simplify(jit_state_t *_jit)
 {
     jit_node_t         *prev;
@@ -3190,7 +3290,9 @@ _simplify(jit_state_t *_jit)
     jit_node_t         *next;
     jit_int32_t                 info;
     jit_int32_t                 regno;
+    jit_bool_t          result;
 
+    result = 0;
     for (prev = NULL, node = _jitc->head; node; prev = node, node = next) {
        next = node->next;
        switch (node->code) {
@@ -3213,6 +3315,7 @@ _simplify(jit_state_t *_jit)
                     * already holding */
                    patch_register(node->link->next, node,
                                   jit_regno_patch|regno, regno);
+                   result = 1;
                    del_node(_jitc->spill[regno], node->link);
                    del_node(prev, node);
                    node = prev;
@@ -3222,38 +3325,50 @@ _simplify(jit_state_t *_jit)
            case jit_code_movr:
                regno = jit_regno(node->u.w);
                if (simplify_movr(prev, node,
-                                 jit_kind_word, sizeof(jit_word_t)))
+                                 jit_kind_word, sizeof(jit_word_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movi:
                regno = jit_regno(node->u.w);
                if (simplify_movi(prev, node,
-                                 jit_kind_word, sizeof(jit_word_t)))
+                                 jit_kind_word, sizeof(jit_word_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movr_f:
                regno = jit_regno(node->u.w);
                if (simplify_movr(prev, node,
-                                 jit_kind_float32, sizeof(jit_float32_t)))
+                                 jit_kind_float32, sizeof(jit_float32_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movi_f:
                regno = jit_regno(node->u.w);
                if (simplify_movi(prev, node,
-                                 jit_kind_float32, sizeof(jit_float32_t)))
+                                 jit_kind_float32, sizeof(jit_float32_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movr_d:
                regno = jit_regno(node->u.w);
                if (simplify_movr(prev, node,
-                                 jit_kind_float64, sizeof(jit_float64_t)))
+                                 jit_kind_float64, sizeof(jit_float64_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_movi_d:
                regno = jit_regno(node->u.w);
                if (simplify_movi(prev, node,
-                                 jit_kind_float64, sizeof(jit_float64_t)))
+                                 jit_kind_float64, sizeof(jit_float64_t))) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_ldxi_c:       case jit_code_ldxi_uc:
            case jit_code_ldxi_s:       case jit_code_ldxi_us:
@@ -3261,15 +3376,19 @@ _simplify(jit_state_t *_jit)
            case jit_code_ldxi_l:
            case jit_code_ldxi_f:       case jit_code_ldxi_d:
                regno = jit_regno(node->u.w);
-               if (simplify_ldxi(prev, node))
+               if (simplify_ldxi(prev, node)) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            case jit_code_stxi_c:       case jit_code_stxi_s:
            case jit_code_stxi_i:       case jit_code_stxi_l:
            case jit_code_stxi_f:       case jit_code_stxi_d:
                regno = jit_regno(node->u.w);
-               if (simplify_stxi(prev, node))
+               if (simplify_stxi(prev, node)) {
+                   result = 1;
                    simplify_spill(node = prev, regno);
+               }
                break;
            default:
                info = jit_classify(node->code);
@@ -3298,13 +3417,29 @@ _simplify(jit_state_t *_jit)
                    ++_jitc->gen[regno];
                }
                if (info & jit_cc_a2_chg) {
-                   regno = jit_regno(node->w.w);
-                   _jitc->values[regno].kind = 0;
-                   ++_jitc->gen[regno];
+#if 0
+                   /* Assume registers are not changed */
+                   if (info & jit_cc_a2_rlh) {
+                       regno = jit_regno(node->w.q.l);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+                       regno = jit_regno(node->w.q.h);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+                   }
+                   else {
+#endif
+                       regno = jit_regno(node->w.w);
+                       _jitc->values[regno].kind = 0;
+                       ++_jitc->gen[regno];
+#if 0
+                   }
+#endif
                }
                break;
        }
     }
+    return (result);
 }
 
 static jit_int32_t
@@ -3505,8 +3640,18 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, jit_node_t *link,
        }
        if ((value & jit_cc_a1_reg) && node->v.w == regno)
            node->v.w = patch;
-       if ((value & jit_cc_a2_reg) && node->w.w == regno)
-           node->w.w = patch;
+       if (value & jit_cc_a2_reg) {
+           if (value & jit_cc_a2_rlh) {
+               if (node->w.q.l == regno)
+                   node->w.q.l = patch;
+               if (node->w.q.h == regno)
+                   node->w.q.h = patch;
+           }
+           else {
+               if (node->w.w == regno)
+                   node->w.w = patch;
+           }
+       }
     }
 }
 
index 9e55aa9..3f25cc2 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/lightrec.git
        branch = master
-       commit = 7545b5a7995be9e7b70e786a6b534004ea26c999
-       parent = 2fba93f2853c57240f031adb4712acbd2a066d34
+       commit = e1222761836bb478dcec86cf441dcc5514565137
+       parent = eeff1b0a26e4c7f7449640c0bf999e506f538694
        method = merge
        cmdver = 0.4.3
index 40ecc8f..ab2c13b 100644 (file)
@@ -50,4 +50,6 @@ Lightrec has been ported to the following emulators:
 
 * [__pcsx4all__ (my own fork)](https://github.com/pcercuei/pcsx4all)
 
-* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/)
\ No newline at end of file
+* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/)
+
+[![Star History Chart](https://api.star-history.com/svg?repos=pcercuei/lightrec&type=Date)](https://star-history.com/#pcercuei/lightrec&Date)
index 70c5aeb..bb58cdb 100644 (file)
@@ -7,6 +7,8 @@
 #include "debug.h"
 #include "lightrec-private.h"
 #include "memmanager.h"
+#include "reaper.h"
+#include "recompiler.h"
 
 #include <stdbool.h>
 #include <stdlib.h>
@@ -117,6 +119,7 @@ static void lightrec_free_blocks(struct blockcache *cache,
        struct block *block, *next;
        bool outdated = all;
        unsigned int i;
+       u8 old_flags;
 
        for (i = 0; i < LUT_SIZE; i++) {
                for (block = cache->lut[i]; block; block = next) {
@@ -130,7 +133,15 @@ static void lightrec_free_blocks(struct blockcache *cache,
                                        lightrec_block_is_outdated(state, block);
                        }
 
-                       if (outdated) {
+                       if (!outdated)
+                               continue;
+
+                       old_flags = block_set_flags(block, BLOCK_IS_DEAD);
+
+                       if (!(old_flags & BLOCK_IS_DEAD)) {
+                               if (ENABLE_THREADED_COMPILER)
+                                       lightrec_recompiler_remove(state->rec, block);
+
                                pr_debug("Freeing outdated block at PC 0x%08x\n", block->pc);
                                remove_from_code_lut(cache, block);
                                lightrec_unregister_block(cache, block);
@@ -187,11 +198,27 @@ u32 lightrec_calculate_block_hash(const struct block *block)
        return hash;
 }
 
+static void lightrec_reset_lut_offset(struct lightrec_state *state, void *d)
+{
+       u32 pc = (u32)(uintptr_t) d;
+       struct block *block;
+       void *addr;
+
+       block = lightrec_find_block(state->block_cache, pc);
+       if (!block)
+               return;
+
+       if (block_has_flag(block, BLOCK_IS_DEAD))
+               return;
+
+       addr = block->function ?: state->get_next_block;
+       lut_write(state, lut_offset(pc), addr);
+}
+
 bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block)
 {
        u32 offset = lut_offset(block->pc);
        bool outdated;
-       void *addr;
 
        if (lut_read(state, offset))
                return false;
@@ -200,12 +227,24 @@ bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *bloc
        if (likely(!outdated)) {
                /* The block was marked as outdated, but the content is still
                 * the same */
-               if (block->function)
-                       addr = block->function;
-               else
-                       addr = state->get_next_block;
 
-               lut_write(state, offset, addr);
+               if (ENABLE_THREADED_COMPILER) {
+                       /*
+                        * When compiling a block that covers ours, the threaded
+                        * compiler will set the LUT entries of the various
+                        * entry points. Therefore we cannot write the LUT here,
+                        * as we would risk overwriting the new entry points.
+                        * Leave it to the reaper to re-install the LUT entries.
+                        */
+
+                       lightrec_reaper_add(state->reaper,
+                                           lightrec_reset_lut_offset,
+                                           (void *)(uintptr_t) block->pc);
+               } else if (block->function) {
+                       lut_write(state, offset, block->function);
+               } else {
+                       lut_write(state, offset, state->get_next_block);
+               }
        }
 
        return outdated;
index 5c94324..1a217bc 100644 (file)
@@ -11,7 +11,7 @@
 #include "lightrec-private.h"
 #include "regcache.h"
 
-static const char *std_opcodes[] = {
+static const char * const std_opcodes[] = {
        [OP_J]                  = "j       ",
        [OP_JAL]                = "jal     ",
        [OP_BEQ]                = "beq     ",
@@ -42,7 +42,7 @@ static const char *std_opcodes[] = {
        [OP_SWC2]               = "swc2    ",
 };
 
-static const char *special_opcodes[] = {
+static const char * const special_opcodes[] = {
        [OP_SPECIAL_SLL]        = "sll     ",
        [OP_SPECIAL_SRL]        = "srl     ",
        [OP_SPECIAL_SRA]        = "sra     ",
@@ -73,14 +73,14 @@ static const char *special_opcodes[] = {
        [OP_SPECIAL_SLTU]       = "sltu    ",
 };
 
-static const char *regimm_opcodes[] = {
+static const char * const regimm_opcodes[] = {
        [OP_REGIMM_BLTZ]        = "bltz    ",
        [OP_REGIMM_BGEZ]        = "bgez    ",
        [OP_REGIMM_BLTZAL]      = "bltzal  ",
        [OP_REGIMM_BGEZAL]      = "bgezal  ",
 };
 
-static const char *cp0_opcodes[] = {
+static const char * const cp0_opcodes[] = {
        [OP_CP0_MFC0]           = "mfc0    ",
        [OP_CP0_CFC0]           = "cfc0    ",
        [OP_CP0_MTC0]           = "mtc0    ",
@@ -88,38 +88,68 @@ static const char *cp0_opcodes[] = {
        [OP_CP0_RFE]            = "rfe",
 };
 
-static const char *cp2_opcodes[] = {
+static const char * const cp2_basic_opcodes[] = {
        [OP_CP2_BASIC_MFC2]     = "mfc2    ",
        [OP_CP2_BASIC_CFC2]     = "cfc2    ",
        [OP_CP2_BASIC_MTC2]     = "mtc2    ",
        [OP_CP2_BASIC_CTC2]     = "ctc2    ",
 };
 
-static const char *opcode_flags[] = {
+static const char * const cp2_opcodes[] = {
+       [OP_CP2_RTPS]           = "rtps    ",
+       [OP_CP2_NCLIP]          = "nclip   ",
+       [OP_CP2_OP]             = "op      ",
+       [OP_CP2_DPCS]           = "dpcs    ",
+       [OP_CP2_INTPL]          = "intpl   ",
+       [OP_CP2_MVMVA]          = "mvmva   ",
+       [OP_CP2_NCDS]           = "ncds    ",
+       [OP_CP2_CDP]            = "cdp     ",
+       [OP_CP2_NCDT]           = "ncdt    ",
+       [OP_CP2_NCCS]           = "nccs    ",
+       [OP_CP2_CC]             = "cc      ",
+       [OP_CP2_NCS]            = "ncs     ",
+       [OP_CP2_NCT]            = "nct     ",
+       [OP_CP2_SQR]            = "sqr     ",
+       [OP_CP2_DCPL]           = "dcpl    ",
+       [OP_CP2_DPCT]           = "dpct    ",
+       [OP_CP2_AVSZ3]          = "avsz3   ",
+       [OP_CP2_AVSZ4]          = "avsz4   ",
+       [OP_CP2_RTPT]           = "rtpt    ",
+       [OP_CP2_GPF]            = "gpf     ",
+       [OP_CP2_GPL]            = "gpl     ",
+       [OP_CP2_NCCT]           = "ncct    ",
+};
+
+static const char * const mult2_opcodes[] = {
+       "mult2   ", "multu2  ",
+};
+
+static const char * const opcode_flags[] = {
        "switched branch/DS",
        "sync point",
 };
 
-static const char *opcode_io_flags[] = {
+static const char * const opcode_io_flags[] = {
        "self-modifying code",
        "no invalidation",
        "no mask",
 };
 
-static const char *opcode_io_modes[] = {
+static const char * const opcode_io_modes[] = {
        "Memory access",
        "I/O access",
        "RAM access",
        "BIOS access",
        "Scratchpad access",
+       "Mapped I/O access"
 };
 
-static const char *opcode_branch_flags[] = {
+static const char * const opcode_branch_flags[] = {
        "emulate branch",
        "local branch",
 };
 
-static const char *opcode_multdiv_flags[] = {
+static const char * const opcode_multdiv_flags[] = {
        "No LO",
        "No HI",
        "No div check",
@@ -145,7 +175,7 @@ static const char * const reg_op_token[3] = {
 };
 
 static int print_flags(char *buf, size_t len, const struct opcode *op,
-                      const char **array, size_t array_size,
+                      const char * const *array, size_t array_size,
                       bool is_io)
 {
        const char *flag_name, *io_mode_name;
@@ -223,7 +253,7 @@ static int print_flags(char *buf, size_t len, const struct opcode *op,
 }
 
 static int print_op_special(union code c, char *buf, size_t len,
-                           const char ***flags_ptr, size_t *nb_flags)
+                           const char * const **flags_ptr, size_t *nb_flags)
 {
        switch (c.r.op) {
        case OP_SPECIAL_SLL:
@@ -294,17 +324,14 @@ static int print_op_special(union code c, char *buf, size_t len,
 static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp)
 {
        if (cp == 2) {
-               switch (c.i.rs) {
-               case OP_CP0_MFC0:
-               case OP_CP0_CFC0:
-               case OP_CP0_MTC0:
-               case OP_CP0_CTC0:
+               switch (c.r.op) {
+               case OP_CP2_BASIC:
                        return snprintf(buf, len, "%s%s,%u",
-                                       cp2_opcodes[c.i.rs],
+                                       cp2_basic_opcodes[c.i.rs],
                                        lightrec_reg_name(c.i.rt),
                                        c.r.rd);
                default:
-                       return snprintf(buf, len, "cp2     (0x%08x)", c.opcode);
+                       return snprintf(buf, len, "%s", cp2_opcodes[c.r.op]);
                }
        } else {
                switch (c.i.rs) {
@@ -325,7 +352,7 @@ static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp)
 }
 
 static int print_op(union code c, u32 pc, char *buf, size_t len,
-                   const char ***flags_ptr, size_t *nb_flags,
+                   const char * const **flags_ptr, size_t *nb_flags,
                    bool *is_io)
 {
        if (c.opcode == 0)
@@ -429,6 +456,15 @@ static int print_op(union code c, u32 pc, char *buf, size_t len,
                return snprintf(buf, len, "exts    %s,%s",
                                lightrec_reg_name(c.i.rt),
                                lightrec_reg_name(c.i.rs));
+       case OP_META_MULT2:
+       case OP_META_MULTU2:
+               *flags_ptr = opcode_multdiv_flags;
+               *nb_flags = ARRAY_SIZE(opcode_multdiv_flags);
+               return snprintf(buf, len, "%s%s,%s,%s,%u",
+                               mult2_opcodes[c.i.op == OP_META_MULTU2],
+                               lightrec_reg_name(get_mult_div_hi(c)),
+                               lightrec_reg_name(get_mult_div_lo(c)),
+                               lightrec_reg_name(c.r.rs), c.r.op);
        default:
                return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
        }
@@ -437,7 +473,7 @@ static int print_op(union code c, u32 pc, char *buf, size_t len,
 void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr)
 {
        const struct opcode *op;
-       const char **flags_ptr;
+       const char * const *flags_ptr;
        size_t nb_flags, count, count2;
        char buf[256], buf2[256], buf3[256];
        unsigned int i;
index a4fc9f5..e4685a9 100644 (file)
@@ -34,6 +34,7 @@
 #define LIGHTREC_IO_RAM                0x3
 #define LIGHTREC_IO_BIOS       0x4
 #define LIGHTREC_IO_SCRATCH    0x5
+#define LIGHTREC_IO_DIRECT_HW  0x6
 #define LIGHTREC_IO_MASK       LIGHTREC_IO_MODE(0x7)
 #define LIGHTREC_FLAGS_GET_IO_MODE(x) \
        (((x) & LIGHTREC_IO_MASK) >> LIGHTREC_IO_MODE_LSB)
@@ -110,6 +111,9 @@ enum standard_opcodes {
 
        OP_META_EXTC            = 0x17,
        OP_META_EXTS            = 0x18,
+
+       OP_META_MULT2           = 0x19,
+       OP_META_MULTU2          = 0x1a,
 };
 
 enum special_opcodes {
@@ -160,6 +164,28 @@ enum cp0_opcodes {
 
 enum cp2_opcodes {
        OP_CP2_BASIC            = 0x00,
+       OP_CP2_RTPS             = 0x01,
+       OP_CP2_NCLIP            = 0x06,
+       OP_CP2_OP               = 0x0c,
+       OP_CP2_DPCS             = 0x10,
+       OP_CP2_INTPL            = 0x11,
+       OP_CP2_MVMVA            = 0x12,
+       OP_CP2_NCDS             = 0x13,
+       OP_CP2_CDP              = 0x14,
+       OP_CP2_NCDT             = 0x16,
+       OP_CP2_NCCS             = 0x1b,
+       OP_CP2_CC               = 0x1c,
+       OP_CP2_NCS              = 0x1e,
+       OP_CP2_NCT              = 0x20,
+       OP_CP2_SQR              = 0x28,
+       OP_CP2_DCPL             = 0x29,
+       OP_CP2_DPCT             = 0x2a,
+       OP_CP2_AVSZ3            = 0x2d,
+       OP_CP2_AVSZ4            = 0x2e,
+       OP_CP2_RTPT             = 0x30,
+       OP_CP2_GPF              = 0x3d,
+       OP_CP2_GPL              = 0x3e,
+       OP_CP2_NCCT             = 0x3f,
 };
 
 enum cp2_basic_opcodes {
@@ -233,6 +259,11 @@ struct opcode {
        u32 flags;
 };
 
+struct opcode_list {
+       u16 nb_ops;
+       struct opcode ops[];
+};
+
 void lightrec_print_disassembly(const struct block *block, const u32 *code);
 
 static inline _Bool op_flag_no_ds(u32 flags)
index 3af0432..cf32f7a 100644 (file)
@@ -29,6 +29,15 @@ static void unknown_opcode(struct lightrec_cstate *state, const struct block *bl
                block->pc + (offset << 2));
 }
 
+static void
+lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit)
+{
+       /* Prevent jit_jmpi() from using our cycles register as a temporary */
+       jit_live(LIGHTREC_REG_CYCLE);
+
+       jit_patch_abs(jit_jmpi(), state->state->eob_wrapper_func);
+}
+
 static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
                                       const struct block *block, u16 offset,
                                       s8 reg_new_pc, u32 imm, u8 ra_reg,
@@ -39,7 +48,6 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
        const struct opcode *op = &block->opcode_list[offset],
                            *next = &block->opcode_list[offset + 1];
        u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c);
-       u16 offset_after_eob;
 
        jit_note(__FILE__, __LINE__);
 
@@ -76,11 +84,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
                pr_debug("EOB: %u cycles\n", cycles);
        }
 
-       offset_after_eob = offset + 1 +
-               (has_delay_slot(op->c) && !op_flag_no_ds(op->flags));
-
-       if (offset_after_eob < block->nb_ops)
-               state->branches[state->nb_branches++] = jit_b();
+       lightrec_jump_to_eob(state, _jit);
 }
 
 void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block,
@@ -99,7 +103,7 @@ void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block,
        jit_movi(JIT_V0, block->pc + (offset << 2));
        jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
 
-       state->branches[state->nb_branches++] = jit_b();
+       lightrec_jump_to_eob(state, _jit);
 }
 
 static u8 get_jr_jalr_reg(struct lightrec_cstate *state, const struct block *block, u16 offset)
@@ -191,7 +195,7 @@ static void lightrec_do_early_unload(struct lightrec_cstate *state,
 }
 
 static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset,
-                 jit_code_t code, u32 link, bool unconditional, bool bz)
+                 jit_code_t code, jit_code_t code2, u32 link, bool unconditional, bool bz)
 {
        struct regcache *reg_cache = state->reg_cache;
        struct native_register *regs_backup;
@@ -204,6 +208,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16
        bool is_forward = (s16)op->i.imm >= -1;
        int op_cycles = lightrec_cycles_of_opcode(op->c);
        u32 target_offset, cycles = state->cycles + op_cycles;
+       bool no_indirection = false;
        u32 next_pc;
 
        jit_note(__FILE__, __LINE__);
@@ -221,6 +226,14 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16
                /* Unload dead registers before evaluating the branch */
                if (OPT_EARLY_UNLOAD)
                        lightrec_do_early_unload(state, block, offset);
+
+               if (op_flag_local_branch(op->flags) &&
+                   (op_flag_no_ds(op->flags) || !next->opcode) &&
+                   is_forward && !lightrec_has_dirty_regs(reg_cache))
+                       no_indirection = true;
+
+               if (no_indirection)
+                       pr_debug("Using no indirection for branch at offset 0x%hx\n", offset << 2);
        }
 
        if (cycles)
@@ -228,7 +241,8 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16
 
        if (!unconditional) {
                /* Generate the branch opcode */
-               addr = jit_new_node_pww(code, NULL, rs, rt);
+               if (!no_indirection)
+                       addr = jit_new_node_pww(code, NULL, rs, rt);
 
                lightrec_free_regs(reg_cache);
                regs_backup = lightrec_regcache_enter_branch(reg_cache);
@@ -257,7 +271,10 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16
                        state->nb_local_branches++];
 
                branch->target = target_offset;
-               if (is_forward)
+
+               if (no_indirection)
+                       branch->branch = jit_new_node_pww(code2, NULL, rs, rt);
+               else if (is_forward)
                        branch->branch = jit_b();
                else
                        branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0);
@@ -270,7 +287,9 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16
        }
 
        if (!unconditional) {
-               jit_patch(addr);
+               if (!no_indirection)
+                       jit_patch(addr);
+
                lightrec_regcache_leave_branch(reg_cache, regs_backup);
 
                if (bz && link) {
@@ -294,9 +313,9 @@ static void rec_BNE(struct lightrec_cstate *state,
        _jit_name(block->_jit, __func__);
 
        if (c.i.rt == 0)
-               rec_b(state, block, offset, jit_code_beqi, 0, false, true);
+               rec_b(state, block, offset, jit_code_beqi, jit_code_bnei, 0, false, true);
        else
-               rec_b(state, block, offset, jit_code_beqr, 0, false, false);
+               rec_b(state, block, offset, jit_code_beqr, jit_code_bner, 0, false, false);
 }
 
 static void rec_BEQ(struct lightrec_cstate *state,
@@ -307,9 +326,9 @@ static void rec_BEQ(struct lightrec_cstate *state,
        _jit_name(block->_jit, __func__);
 
        if (c.i.rt == 0)
-               rec_b(state, block, offset, jit_code_bnei, 0, c.i.rs == 0, true);
+               rec_b(state, block, offset, jit_code_bnei, jit_code_beqi, 0, c.i.rs == 0, true);
        else
-               rec_b(state, block, offset, jit_code_bner, 0, c.i.rs == c.i.rt, false);
+               rec_b(state, block, offset, jit_code_bner, jit_code_beqr, 0, c.i.rs == c.i.rt, false);
 }
 
 static void rec_BLEZ(struct lightrec_cstate *state,
@@ -318,28 +337,28 @@ static void rec_BLEZ(struct lightrec_cstate *state,
        union code c = block->opcode_list[offset].c;
 
        _jit_name(block->_jit, __func__);
-       rec_b(state, block, offset, jit_code_bgti, 0, c.i.rs == 0, true);
+       rec_b(state, block, offset, jit_code_bgti, jit_code_blei, 0, c.i.rs == 0, true);
 }
 
 static void rec_BGTZ(struct lightrec_cstate *state,
                     const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_b(state, block, offset, jit_code_blei, 0, false, true);
+       rec_b(state, block, offset, jit_code_blei, jit_code_bgti, 0, false, true);
 }
 
 static void rec_regimm_BLTZ(struct lightrec_cstate *state,
                            const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_b(state, block, offset, jit_code_bgei, 0, false, true);
+       rec_b(state, block, offset, jit_code_bgei, jit_code_blti, 0, false, true);
 }
 
 static void rec_regimm_BLTZAL(struct lightrec_cstate *state,
                              const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_b(state, block, offset, jit_code_bgei,
+       rec_b(state, block, offset, jit_code_bgei, jit_code_blti,
              get_branch_pc(block, offset, 2), false, true);
 }
 
@@ -349,7 +368,7 @@ static void rec_regimm_BGEZ(struct lightrec_cstate *state,
        union code c = block->opcode_list[offset].c;
 
        _jit_name(block->_jit, __func__);
-       rec_b(state, block, offset, jit_code_blti, 0, !c.i.rs, true);
+       rec_b(state, block, offset, jit_code_blti, jit_code_bgei, 0, !c.i.rs, true);
 }
 
 static void rec_regimm_BGEZAL(struct lightrec_cstate *state,
@@ -357,7 +376,7 @@ static void rec_regimm_BGEZAL(struct lightrec_cstate *state,
 {
        const struct opcode *op = &block->opcode_list[offset];
        _jit_name(block->_jit, __func__);
-       rec_b(state, block, offset, jit_code_blti,
+       rec_b(state, block, offset, jit_code_blti, jit_code_bgei,
              get_branch_pc(block, offset, 2),
              !op->i.rs, true);
 }
@@ -662,8 +681,8 @@ static void rec_special_or_nor(struct lightrec_cstate *state,
 
        /* E(rd) = (E(rs) & E(rt)) | (E(rt) & !Z(rt)) | (E(rs) & !Z(rs)) */
        if ((REG_EXT & flags_rs & flags_rt) ||
-           (flags_rt & (REG_EXT | REG_ZEXT) == REG_EXT) ||
-           (flags_rs & (REG_EXT | REG_ZEXT) == REG_EXT))
+           ((flags_rt & (REG_EXT | REG_ZEXT)) == REG_EXT) ||
+           ((flags_rs & (REG_EXT | REG_ZEXT)) == REG_EXT))
                flags_rd |= REG_EXT;
 
        lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
@@ -1034,22 +1053,41 @@ static void rec_special_MTLO(struct lightrec_cstate *state,
        rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs);
 }
 
-static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block *block,
-                             u32 arg, bool with_arg, enum c_wrappers wrapper)
+static void call_to_c_wrapper(struct lightrec_cstate *state,
+                             const struct block *block, u32 arg,
+                             enum c_wrappers wrapper)
 {
        struct regcache *reg_cache = state->reg_cache;
        jit_state_t *_jit = block->_jit;
-       u8 tmp;
+       s8 tmp, tmp2;
 
-       tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
-       jit_ldxi(tmp, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, wrappers_eps[wrapper]));
+       /* Make sure JIT_R1 is not mapped; it will be used in the C wrapper. */
+       tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
 
-       if (with_arg) {
-               jit_prepare();
-               jit_pushargi(arg);
+       tmp = lightrec_get_reg_with_value(reg_cache,
+                                         (intptr_t) state->state->wrappers_eps[wrapper]);
+       if (tmp < 0) {
+               tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+               jit_ldxi(tmp, LIGHTREC_REG_STATE,
+                        offsetof(struct lightrec_state, wrappers_eps[wrapper]));
+
+               lightrec_temp_set_value(reg_cache, tmp,
+                                       (intptr_t) state->state->wrappers_eps[wrapper]);
        }
 
+       lightrec_free_reg(reg_cache, tmp2);
+
+#ifdef __mips__
+       /* On MIPS, register t9 is always used as the target register for JALR.
+        * Therefore if it does not contain the target address we must
+        * invalidate it. */
+       if (tmp != _T9)
+               lightrec_unload_reg(reg_cache, _jit, _T9);
+#endif
+
+       jit_prepare();
+       jit_pushargi(arg);
+
        lightrec_regcache_mark_live(reg_cache, _jit);
        jit_callr(tmp);
 
@@ -1078,11 +1116,11 @@ static void rec_io(struct lightrec_cstate *state,
                lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
 
        if (is_tagged) {
-               call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_RW);
+               call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_RW);
        } else {
                lut_entry = lightrec_get_lut_entry(block);
                call_to_c_wrapper(state, block, (lut_entry << 16) | offset,
-                                 true, C_WRAPPER_RW_GENERIC);
+                                 C_WRAPPER_RW_GENERIC);
        }
 }
 
@@ -1111,8 +1149,7 @@ static void rec_store_memory(struct lightrec_cstate *cstate,
        bool add_imm = c.i.imm &&
                ((!state->mirrors_mapped && !no_mask) || (invalidate &&
                ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
-       bool need_tmp = !no_mask || addr_offset || add_imm;
-       bool need_tmp2 = addr_offset || invalidate;
+       bool need_tmp = !no_mask || addr_offset || add_imm || invalidate;
 
        rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
        rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
@@ -1134,10 +1171,8 @@ static void rec_store_memory(struct lightrec_cstate *cstate,
                addr_reg = tmp;
        }
 
-       if (need_tmp2)
-               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
-
        if (addr_offset) {
+               tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
                jit_addi(tmp2, addr_reg, addr_offset);
                addr_reg2 = tmp2;
        } else {
@@ -1161,20 +1196,20 @@ static void rec_store_memory(struct lightrec_cstate *cstate,
                tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
 
                if (c.i.op != OP_SW) {
-                       jit_andi(tmp2, addr_reg, ~3);
-                       addr_reg = tmp2;
+                       jit_andi(tmp, addr_reg, ~3);
+                       addr_reg = tmp;
                }
 
                if (!lut_is_32bit(state)) {
-                       jit_lshi(tmp2, addr_reg, 1);
-                       addr_reg = tmp2;
+                       jit_lshi(tmp, addr_reg, 1);
+                       addr_reg = tmp;
                }
 
                if (addr_reg == rs && c.i.rs == 0) {
                        addr_reg = LIGHTREC_REG_STATE;
                } else {
-                       jit_addr(tmp2, addr_reg, LIGHTREC_REG_STATE);
-                       addr_reg = tmp2;
+                       jit_addr(tmp, addr_reg, LIGHTREC_REG_STATE);
+                       addr_reg = tmp;
                }
 
                if (lut_is_32bit(state))
@@ -1185,7 +1220,7 @@ static void rec_store_memory(struct lightrec_cstate *cstate,
                lightrec_free_reg(reg_cache, tmp3);
        }
 
-       if (need_tmp2)
+       if (addr_offset)
                lightrec_free_reg(reg_cache, tmp2);
        if (need_tmp)
                lightrec_free_reg(reg_cache, tmp);
@@ -1217,6 +1252,17 @@ static void rec_store_scratch(struct lightrec_cstate *cstate,
                                0x1fffffff, false);
 }
 
+static void rec_store_io(struct lightrec_cstate *cstate,
+                        const struct block *block, u16 offset,
+                        jit_code_t code, jit_code_t swap_code)
+{
+       _jit_note(block->_jit, __FILE__, __LINE__);
+
+       return rec_store_memory(cstate, block, offset, code, swap_code,
+                               cstate->state->offset_io,
+                               0x1fffffff, false);
+}
+
 static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
                                           const struct block *block,
                                           u16 offset, jit_code_t code,
@@ -1232,7 +1278,6 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
 
        jit_note(__FILE__, __LINE__);
        rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
-       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
        tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
 
        if (state->offset_ram || state->offset_scratch)
@@ -1272,6 +1317,8 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
                lightrec_free_reg(reg_cache, tmp2);
        }
 
+       rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
+
        if (is_big_endian() && swap_code && c.i.rt) {
                tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
 
@@ -1390,6 +1437,9 @@ static void rec_store(struct lightrec_cstate *state,
                        rec_store_direct(state, block, offset, code, swap_code);
                }
                break;
+       case LIGHTREC_IO_DIRECT_HW:
+               rec_store_io(state, block, offset, code, swap_code);
+               break;
        default:
                rec_io(state, block, offset, true, false);
                break;
@@ -1527,6 +1577,16 @@ static void rec_load_scratch(struct lightrec_cstate *cstate,
                        cstate->state->offset_scratch, 0x1fffffff);
 }
 
+static void rec_load_io(struct lightrec_cstate *cstate,
+                       const struct block *block, u16 offset,
+                       jit_code_t code, jit_code_t swap_code, bool is_unsigned)
+{
+       _jit_note(block->_jit, __FILE__, __LINE__);
+
+       rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
+                       cstate->state->offset_io, 0x1fffffff);
+}
+
 static void rec_load_direct(struct lightrec_cstate *cstate,
                            const struct block *block, u16 offset,
                            jit_code_t code, jit_code_t swap_code,
@@ -1652,6 +1712,9 @@ static void rec_load(struct lightrec_cstate *state, const struct block *block,
        case LIGHTREC_IO_SCRATCH:
                rec_load_scratch(state, block, offset, code, swap_code, is_unsigned);
                break;
+       case LIGHTREC_IO_DIRECT_HW:
+               rec_load_io(state, block, offset, code, swap_code, is_unsigned);
+               break;
        case LIGHTREC_IO_DIRECT:
                rec_load_direct(state, block, offset, code, swap_code, is_unsigned);
                break;
@@ -1675,8 +1738,10 @@ static void rec_LBU(struct lightrec_cstate *state, const struct block *block, u1
 
 static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
+       jit_code_t code = is_big_endian() ? jit_code_ldxi_us : jit_code_ldxi_s;
+
        _jit_name(block->_jit, __func__);
-       rec_load(state, block, offset, jit_code_ldxi_s, jit_code_bswapr_us, false);
+       rec_load(state, block, offset, code, jit_code_bswapr_us, false);
 }
 
 static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
@@ -1699,8 +1764,15 @@ static void rec_LWR(struct lightrec_cstate *state, const struct block *block, u1
 
 static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
 {
+       jit_code_t code;
+
+       if (is_big_endian() && __WORDSIZE == 64)
+               code = jit_code_ldxi_ui;
+       else
+               code = jit_code_ldxi_i;
+
        _jit_name(block->_jit, __func__);
-       rec_load(state, block, offset, jit_code_ldxi_i, jit_code_bswapr_ui, false);
+       rec_load(state, block, offset, code, jit_code_bswapr_ui, false);
 }
 
 static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset)
@@ -1710,14 +1782,22 @@ static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u
 }
 
 static void rec_break_syscall(struct lightrec_cstate *state,
-                             const struct block *block, u16 offset, bool is_break)
+                             const struct block *block, u16 offset,
+                             u32 exit_code)
 {
+       struct regcache *reg_cache = state->reg_cache;
+       jit_state_t *_jit = block->_jit;
+       u8 tmp;
+
        _jit_note(block->_jit, __FILE__, __LINE__);
 
-       if (is_break)
-               call_to_c_wrapper(state, block, 0, false, C_WRAPPER_BREAK);
-       else
-               call_to_c_wrapper(state, block, 0, false, C_WRAPPER_SYSCALL);
+       tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+       jit_movi(tmp, exit_code);
+       jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
+                  LIGHTREC_REG_STATE, tmp);
+
+       lightrec_free_reg(reg_cache, tmp);
 
        /* TODO: the return address should be "pc - 4" if we're a delay slot */
        lightrec_emit_end_of_block(state, block, offset, -1,
@@ -1729,14 +1809,14 @@ static void rec_special_SYSCALL(struct lightrec_cstate *state,
                                const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_break_syscall(state, block, offset, false);
+       rec_break_syscall(state, block, offset, LIGHTREC_EXIT_SYSCALL);
 }
 
 static void rec_special_BREAK(struct lightrec_cstate *state,
                              const struct block *block, u16 offset)
 {
        _jit_name(block->_jit, __func__);
-       rec_break_syscall(state, block, offset, true);
+       rec_break_syscall(state, block, offset, LIGHTREC_EXIT_BREAK);
 }
 
 static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset)
@@ -1749,7 +1829,7 @@ static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u1
        lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
        lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
 
-       call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC);
+       call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MTC);
 
        if (c.i.op == OP_CP0 &&
            !op_flag_no_ds(block->opcode_list[offset].flags) &&
@@ -2209,7 +2289,7 @@ static void rec_CP(struct lightrec_cstate *state,
        jit_name(__func__);
        jit_note(__FILE__, __LINE__);
 
-       call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_CP);
+       call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_CP);
 }
 
 static void rec_meta_MOV(struct lightrec_cstate *state,
@@ -2260,6 +2340,59 @@ static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state,
        lightrec_free_reg(reg_cache, rt);
 }
 
+static void rec_meta_MULT2(struct lightrec_cstate *state,
+                          const struct block *block,
+                          u16 offset)
+{
+       struct regcache *reg_cache = state->reg_cache;
+       union code c = block->opcode_list[offset].c;
+       jit_state_t *_jit = block->_jit;
+       u8 reg_lo = get_mult_div_lo(c);
+       u8 reg_hi = get_mult_div_hi(c);
+       u32 flags = block->opcode_list[offset].flags;
+       bool is_signed = c.i.op == OP_META_MULT2;
+       u8 rs, lo, hi, rflags = 0, hiflags = 0;
+
+       if (!op_flag_no_hi(flags) && c.r.op < 32) {
+               rflags = is_signed ? REG_EXT : REG_ZEXT;
+               hiflags = is_signed ? REG_EXT : (REG_EXT | REG_ZEXT);
+       }
+
+       _jit_name(block->_jit, __func__);
+       jit_note(__FILE__, __LINE__);
+
+       rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, rflags);
+
+       if (!op_flag_no_lo(flags)) {
+               lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
+
+               if (c.r.op < 32)
+                       jit_lshi(lo, rs, c.r.op);
+               else
+                       jit_movi(lo, 0);
+
+               lightrec_free_reg(reg_cache, lo);
+       }
+
+       if (!op_flag_no_hi(flags)) {
+               hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, hiflags);
+
+               if (c.r.op >= 32)
+                       jit_lshi(hi, rs, c.r.op - 32);
+               else if (is_signed)
+                       jit_rshi(hi, rs, 32 - c.r.op);
+               else
+                       jit_rshi_u(hi, rs, 32 - c.r.op);
+
+               lightrec_free_reg(reg_cache, hi);
+       }
+
+       lightrec_free_reg(reg_cache, rs);
+
+       _jit_name(block->_jit, __func__);
+       jit_note(__FILE__, __LINE__);
+}
+
 static const lightrec_rec_func_t rec_standard[64] = {
        SET_DEFAULT_ELM(rec_standard, unknown_opcode),
        [OP_SPECIAL]            = rec_SPECIAL,
@@ -2298,6 +2431,8 @@ static const lightrec_rec_func_t rec_standard[64] = {
        [OP_META_MOV]           = rec_meta_MOV,
        [OP_META_EXTC]          = rec_meta_EXTC_EXTS,
        [OP_META_EXTS]          = rec_meta_EXTC_EXTS,
+       [OP_META_MULT2]         = rec_meta_MULT2,
+       [OP_META_MULTU2]        = rec_meta_MULT2,
 };
 
 static const lightrec_rec_func_t rec_special[64] = {
index 57986d8..43bea83 100644 (file)
@@ -985,6 +985,33 @@ static u32 int_META_EXTS(struct interpreter *inter)
        return jump_next(inter);
 }
 
+static u32 int_META_MULT2(struct interpreter *inter)
+{
+       u32 *reg_cache = inter->state->regs.gpr;
+       union code c = inter->op->c;
+       u32 rs = reg_cache[c.r.rs];
+       u8 reg_lo = get_mult_div_lo(c);
+       u8 reg_hi = get_mult_div_hi(c);
+
+       if (!op_flag_no_lo(inter->op->flags)) {
+               if (c.r.op < 32)
+                       reg_cache[reg_lo] = rs << c.r.op;
+               else
+                       reg_cache[reg_lo] = 0;
+       }
+
+       if (!op_flag_no_hi(inter->op->flags)) {
+               if (c.r.op >= 32)
+                       reg_cache[reg_hi] = rs << (c.r.op - 32);
+               else if (c.i.op == OP_META_MULT2)
+                       reg_cache[reg_hi] = (s32) rs >> (32 - c.r.op);
+               else
+                       reg_cache[reg_hi] = rs >> (32 - c.r.op);
+       }
+
+       return jump_next(inter);
+}
+
 static const lightrec_int_func_t int_standard[64] = {
        SET_DEFAULT_ELM(int_standard, int_unimplemented),
        [OP_SPECIAL]            = int_SPECIAL,
@@ -1023,6 +1050,8 @@ static const lightrec_int_func_t int_standard[64] = {
        [OP_META_MOV]           = int_META_MOV,
        [OP_META_EXTC]          = int_META_EXTC,
        [OP_META_EXTS]          = int_META_EXTS,
+       [OP_META_MULT2]         = int_META_MULT2,
+       [OP_META_MULTU2]        = int_META_MULT2,
 };
 
 static const lightrec_int_func_t int_special[64] = {
@@ -1155,5 +1184,7 @@ u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u3
 
        pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);
 
+       lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
+
        return 0;
 }
index 4eedef2..1b120db 100644 (file)
@@ -10,6 +10,7 @@
 #include "lightrec-config.h"
 #include "disassembler.h"
 #include "lightrec.h"
+#include "regcache.h"
 
 #if ENABLE_THREADED_COMPILER
 #include <stdatomic.h>
 
 #define fallthrough do {} while (0) /* fall-through */
 
+#define container_of(ptr, type, member) \
+       ((type *)((void *)(ptr) - offsetof(type, member)))
+
+#ifdef _MSC_BUILD
+#      define popcount32(x)    __popcnt(x)
+#      define ffs32(x)         (31 - __lzcnt(x))
+#else
+#      define popcount32(x)    __builtin_popcount(x)
+#      define ffs32(x)         (__builtin_ffs(x) - 1)
+#endif
+
 /* Flags for (struct block *)->flags */
 #define BLOCK_NEVER_COMPILE    BIT(0)
 #define BLOCK_SHOULD_RECOMPILE BIT(1)
 #define BLOCK_FULLY_TAGGED     BIT(2)
 #define BLOCK_IS_DEAD          BIT(3)
 #define BLOCK_IS_MEMSET                BIT(4)
+#define BLOCK_NO_OPCODE_LIST   BIT(5)
 
 #define RAM_SIZE       0x200000
 #define BIOS_SIZE      0x80000
@@ -90,9 +103,10 @@ struct block {
        u32 precompile_date;
        unsigned int code_size;
        u16 nb_ops;
-       u8 flags;
 #if ENABLE_THREADED_COMPILER
-       atomic_flag op_list_freed;
+       _Atomic u8 flags;
+#else
+       u8 flags;
 #endif
 };
 
@@ -111,18 +125,14 @@ enum c_wrappers {
        C_WRAPPER_RW_GENERIC,
        C_WRAPPER_MTC,
        C_WRAPPER_CP,
-       C_WRAPPER_SYSCALL,
-       C_WRAPPER_BREAK,
        C_WRAPPERS_COUNT,
 };
 
 struct lightrec_cstate {
        struct lightrec_state *state;
 
-       struct jit_node *branches[512];
        struct lightrec_branch local_branches[512];
        struct lightrec_branch_target targets[512];
-       unsigned int nb_branches;
        unsigned int nb_local_branches;
        unsigned int nb_targets;
        unsigned int cycles;
@@ -132,6 +142,7 @@ struct lightrec_cstate {
 
 struct lightrec_state {
        struct lightrec_registers regs;
+       uintptr_t wrapper_regs[NUM_TEMPS];
        u32 next_pc;
        u32 current_cycle;
        u32 target_cycle;
@@ -152,7 +163,7 @@ struct lightrec_state {
        unsigned int nb_precompile;
        unsigned int nb_maps;
        const struct lightrec_mem_map *maps;
-       uintptr_t offset_ram, offset_bios, offset_scratch;
+       uintptr_t offset_ram, offset_bios, offset_scratch, offset_io;
        _Bool with_32bit_lut;
        _Bool mirrors_mapped;
        _Bool invalidate_from_dma_only;
@@ -262,7 +273,8 @@ void lightrec_free_cstate(struct lightrec_cstate *cstate);
 union code lightrec_read_opcode(struct lightrec_state *state, u32 pc);
 
 int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block);
-void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block);
+void lightrec_free_opcode_list(struct lightrec_state *state,
+                              struct opcode *list);
 
 unsigned int lightrec_cycles_of_opcode(union code code);
 
@@ -281,4 +293,41 @@ static inline s16 s16_max(s16 a, s16 b)
        return a > b ? a : b;
 }
 
+static inline _Bool block_has_flag(struct block *block, u8 flag)
+{
+#if ENABLE_THREADED_COMPILER
+       return atomic_load_explicit(&block->flags, memory_order_relaxed) & flag;
+#else
+       return block->flags & flag;
+#endif
+}
+
+static inline u8 block_set_flags(struct block *block, u8 mask)
+{
+#if ENABLE_THREADED_COMPILER
+       return atomic_fetch_or_explicit(&block->flags, mask,
+                                       memory_order_relaxed);
+#else
+       u8 flags = block->flags;
+
+       block->flags |= mask;
+
+       return flags;
+#endif
+}
+
+static inline u8 block_clear_flags(struct block *block, u8 mask)
+{
+#if ENABLE_THREADED_COMPILER
+       return atomic_fetch_and_explicit(&block->flags, ~mask,
+                                        memory_order_relaxed);
+#else
+       u8 flags = block->flags;
+
+       block->flags &= ~mask;
+
+       return flags;
+#endif
+}
+
 #endif /* __LIGHTREC_PRIVATE_H__ */
index ffa40f0..497cc68 100644 (file)
@@ -342,12 +342,14 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg)
        struct opcode *op;
        bool was_tagged;
        u16 offset = (u16)arg;
+       u16 old_flags;
 
        block = lightrec_find_block_from_lut(state->block_cache,
                                             arg >> 16, state->next_pc);
        if (unlikely(!block)) {
                pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n",
                         state->next_pc, offset);
+               lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
                return;
        }
 
@@ -357,11 +359,14 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg)
        lightrec_rw_helper(state, op->c, &op->flags, block);
 
        if (!was_tagged) {
-               pr_debug("Opcode of block at PC 0x%08x has been tagged - flag "
-                        "for recompilation\n", block->pc);
+               old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE);
 
-               block->flags |= BLOCK_SHOULD_RECOMPILE;
-               lut_write(state, lut_offset(block->pc), NULL);
+               if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) {
+                       pr_debug("Opcode of block at PC 0x%08x has been tagged"
+                                " - flag for recompilation\n", block->pc);
+
+                       lut_write(state, lut_offset(block->pc), NULL);
+               }
        }
 }
 
@@ -418,12 +423,27 @@ static u32 lightrec_mfc2(struct lightrec_state *state, u8 reg)
 
 u32 lightrec_mfc(struct lightrec_state *state, union code op)
 {
+       u32 val;
+
        if (op.i.op == OP_CP0)
                return state->regs.cp0[op.r.rd];
        else if (op.r.rs == OP_CP2_BASIC_MFC2)
                return lightrec_mfc2(state, op.r.rd);
-       else
-               return state->regs.cp2c[op.r.rd];
+
+       val = state->regs.cp2c[op.r.rd];
+
+       switch (op.r.rd) {
+       case 4:
+       case 12:
+       case 20:
+       case 26:
+       case 27:
+       case 29:
+       case 30:
+               return (u32)(s16)val;
+       default:
+               return val;
+       }
 }
 
 static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
@@ -586,31 +606,26 @@ static void lightrec_cp_cb(struct lightrec_state *state, u32 arg)
        lightrec_cp(state, (union code) arg);
 }
 
-static void lightrec_syscall_cb(struct lightrec_state *state)
-{
-       lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL);
-}
-
-static void lightrec_break_cb(struct lightrec_state *state)
-{
-       lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK);
-}
-
 static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
 {
        struct block *block = lightrec_find_block(state->block_cache, pc);
+       u8 old_flags;
 
        if (block && lightrec_block_is_outdated(state, block)) {
                pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
 
-               /* Make sure the recompiler isn't processing the block we'll
-                * destroy */
-               if (ENABLE_THREADED_COMPILER)
-                       lightrec_recompiler_remove(state->rec, block);
+               old_flags = block_set_flags(block, BLOCK_IS_DEAD);
+               if (!(old_flags & BLOCK_IS_DEAD)) {
+                       /* Make sure the recompiler isn't processing the block
+                        * we'll destroy */
+                       if (ENABLE_THREADED_COMPILER)
+                               lightrec_recompiler_remove(state->rec, block);
+
+                       lightrec_unregister_block(state->block_cache, block);
+                       remove_from_code_lut(state->block_cache, block);
+                       lightrec_free_block(state, block);
+               }
 
-               lightrec_unregister_block(state->block_cache, block);
-               remove_from_code_lut(state->block_cache, block);
-               lightrec_free_block(state, block);
                block = NULL;
        }
 
@@ -645,19 +660,18 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
                if (unlikely(!block))
                        break;
 
-               if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) {
+               if (OPT_REPLACE_MEMSET &&
+                   block_has_flag(block, BLOCK_IS_MEMSET)) {
                        func = state->memset_func;
                        break;
                }
 
-               should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE &&
-                       !(block->flags & BLOCK_IS_DEAD);
+               should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) &&
+                       !block_has_flag(block, BLOCK_IS_DEAD);
 
                if (unlikely(should_recompile)) {
                        pr_debug("Block at PC 0x%08x should recompile\n", pc);
 
-                       lightrec_unregister(MEM_FOR_CODE, block->code_size);
-
                        if (ENABLE_THREADED_COMPILER) {
                                lightrec_recompiler_add(state->rec, block);
                        } else {
@@ -677,12 +691,12 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
                if (likely(func))
                        break;
 
-               if (unlikely(block->flags & BLOCK_NEVER_COMPILE)) {
+               if (unlikely(block_has_flag(block, BLOCK_NEVER_COMPILE))) {
                        pc = lightrec_emulate_block(state, block, pc);
 
                } else if (!ENABLE_THREADED_COMPILER) {
                        /* Block wasn't compiled yet - run the interpreter */
-                       if (block->flags & BLOCK_FULLY_TAGGED)
+                       if (block_has_flag(block, BLOCK_FULLY_TAGGED))
                                pr_debug("Block fully tagged, skipping first pass\n");
                        else if (ENABLE_FIRST_PASS && likely(!should_recompile))
                                pc = lightrec_emulate_block(state, block, pc);
@@ -693,6 +707,15 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
                                state->exit_flags = LIGHTREC_EXIT_NOMEM;
                                return NULL;
                        }
+               } else if (unlikely(block_has_flag(block, BLOCK_IS_DEAD))) {
+                       /*
+                        * If the block is dead but has never been compiled,
+                        * then its function pointer is NULL and we cannot
+                        * execute the block. In that case, reap all the dead
+                        * blocks now, and in the next loop we will create a
+                        * new block.
+                        */
+                       lightrec_reaper_reap(state->reaper);
                } else {
                        lightrec_recompiler_add(state->rec, block);
                }
@@ -706,16 +729,6 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc)
        return func;
 }
 
-static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
-                             void (*f)(struct lightrec_state *, u32), u32 arg)
-{
-       state->current_cycle = state->target_cycle - cycles_delta;
-
-       (*f)(state, arg);
-
-       return state->target_cycle - state->current_cycle;
-}
-
 static void * lightrec_alloc_code(struct lightrec_state *state, size_t size)
 {
        void *code;
@@ -821,9 +834,8 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        struct block *block;
        jit_state_t *_jit;
        unsigned int i;
-       int stack_ptr;
-       jit_node_t *to_tramp, *to_fn_epilog;
        jit_node_t *addr[C_WRAPPERS_COUNT - 1];
+       jit_node_t *to_end[C_WRAPPERS_COUNT - 1];
 
        block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
        if (!block)
@@ -840,67 +852,69 @@ static struct block * generate_wrapper(struct lightrec_state *state)
        jit_prolog();
        jit_tramp(256);
 
-       /* Add entry points; separate them by opcodes that increment
-        * LIGHTREC_REG_STATE (since we cannot touch other registers).
-        * The difference will then tell us which C function to call. */
+       /* Add entry points */
        for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) {
-               jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8);
+               jit_ldxi(JIT_R1, LIGHTREC_REG_STATE,
+                        offsetof(struct lightrec_state, c_wrappers[i]));
+               to_end[i - 1] = jit_b();
                addr[i - 1] = jit_indirect();
        }
 
+       jit_ldxi(JIT_R1, LIGHTREC_REG_STATE,
+                offsetof(struct lightrec_state, c_wrappers[0]));
+
+       for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
+               jit_patch(to_end[i]);
+
        jit_epilog();
        jit_prolog();
 
-       stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS);
-
        /* Save all temporaries on stack */
-       for (i = 0; i < NUM_TEMPS; i++)
-               jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
+       for (i = 0; i < NUM_TEMPS; i++) {
+               if (i + FIRST_TEMP != 1) {
+                       jit_stxi(offsetof(struct lightrec_state, wrapper_regs[i]),
+                                LIGHTREC_REG_STATE, JIT_R(i + FIRST_TEMP));
+               }
+       }
 
-       jit_getarg(JIT_R1, jit_arg());
+       jit_getarg(JIT_R2, jit_arg());
 
-       /* Jump to the trampoline */
-       to_tramp = jit_jmpi();
+       jit_prepare();
+       jit_pushargr(LIGHTREC_REG_STATE);
+       jit_pushargr(JIT_R2);
 
-       /* The trampoline will jump back here */
-       to_fn_epilog = jit_label();
+       jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE,
+                   offsetof(struct lightrec_state, target_cycle));
 
-       /* Restore temporaries from stack */
-       for (i = 0; i < NUM_TEMPS; i++)
-               jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t));
+       /* state->current_cycle = state->target_cycle - delta; */
+       jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, LIGHTREC_REG_CYCLE);
+       jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
+                  LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE);
 
-       jit_ret();
-       jit_epilog();
+       /* Call the wrapper function */
+       jit_finishr(JIT_R1);
 
-       /* Trampoline entry point.
-        * The sole purpose of the trampoline is to cheese Lightning not to
-        * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we
-        * do want to return to the caller with this register modified. */
-       jit_prolog();
-       jit_tramp(256);
-       jit_patch(to_tramp);
-
-       /* Retrieve the wrapper function */
-       jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
-                offsetof(struct lightrec_state, c_wrappers));
-
-       /* Restore LIGHTREC_REG_STATE to its correct value */
-       jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state);
+       /* delta = state->target_cycle - state->current_cycle */;
+       jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE,
+                   offsetof(struct lightrec_state, current_cycle));
+       jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE,
+                   offsetof(struct lightrec_state, target_cycle));
+       jit_subr(LIGHTREC_REG_CYCLE, JIT_R1, LIGHTREC_REG_CYCLE);
 
-       jit_prepare();
-       jit_pushargr(LIGHTREC_REG_STATE);
-       jit_pushargr(LIGHTREC_REG_CYCLE);
-       jit_pushargr(JIT_R0);
-       jit_pushargr(JIT_R1);
-       jit_finishi(c_function_wrapper);
-       jit_retval_i(LIGHTREC_REG_CYCLE);
+       /* Restore temporaries from stack */
+       for (i = 0; i < NUM_TEMPS; i++) {
+               if (i + FIRST_TEMP != 1) {
+                       jit_ldxi(JIT_R(i + FIRST_TEMP), LIGHTREC_REG_STATE,
+                                offsetof(struct lightrec_state, wrapper_regs[i]));
+               }
+       }
 
-       jit_patch_at(jit_jmpi(), to_fn_epilog);
+       jit_ret();
        jit_epilog();
 
        block->_jit = _jit;
        block->opcode_list = NULL;
-       block->flags = 0;
+       block->flags = BLOCK_NO_OPCODE_LIST;
        block->nb_ops = 0;
 
        block->function = lightrec_emit_code(state, block, _jit,
@@ -974,12 +988,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        jit_prolog();
        jit_frame(256);
 
-       jit_getarg(JIT_R0, jit_arg());
+       jit_getarg(JIT_V1, jit_arg());
        jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
 
        /* Force all callee-saved registers to be pushed on the stack */
        for (i = 0; i < NUM_REGS; i++)
-               jit_movr(JIT_V(i), JIT_V(i));
+               jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG));
 
        /* Pass lightrec_state structure to blocks, using the last callee-saved
         * register that Lightning provides */
@@ -988,13 +1002,15 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        loop = jit_label();
 
        /* Call the block's code */
-       jit_jmpr(JIT_R0);
+       jit_jmpr(JIT_V1);
 
        if (OPT_REPLACE_MEMSET) {
                /* Blocks will jump here when they need to call
                 * lightrec_memset() */
                addr3 = jit_indirect();
 
+               jit_movr(JIT_V1, LIGHTREC_REG_CYCLE);
+
                jit_prepare();
                jit_pushargr(LIGHTREC_REG_STATE);
                jit_finishi(lightrec_memset);
@@ -1002,8 +1018,8 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
                jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
                            offsetof(struct lightrec_state, regs.gpr[31]));
 
-               jit_retval(JIT_R0);
-               jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0);
+               jit_retval(LIGHTREC_REG_CYCLE);
+               jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE);
        }
 
        /* The block will jump here, with the number of cycles remaining in
@@ -1018,25 +1034,30 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
        to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
 
        /* Convert next PC to KUNSEG and avoid mirrors */
-       jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1));
-       jit_rshi_u(JIT_R1, JIT_R0, 28);
+       jit_andi(JIT_V1, JIT_V0, 0x10000000 | (RAM_SIZE - 1));
+       jit_rshi_u(JIT_R1, JIT_V1, 28);
        jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1);
        jit_addi(JIT_R2, JIT_R2, RAM_SIZE);
-       jit_movnr(JIT_R0, JIT_R2, JIT_R1);
+       jit_movnr(JIT_V1, JIT_R2, JIT_R1);
 
        /* If possible, use the code LUT */
        if (!lut_is_32bit(state))
-               jit_lshi(JIT_R0, JIT_R0, 1);
-       jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
+               jit_lshi(JIT_V1, JIT_V1, 1);
+       jit_addr(JIT_V1, JIT_V1, LIGHTREC_REG_STATE);
 
        offset = offsetof(struct lightrec_state, code_lut);
        if (lut_is_32bit(state))
-               jit_ldxi_ui(JIT_R0, JIT_R0, offset);
+               jit_ldxi_ui(JIT_V1, JIT_V1, offset);
        else
-               jit_ldxi(JIT_R0, JIT_R0, offset);
+               jit_ldxi(JIT_V1, JIT_V1, offset);
 
        /* If we get non-NULL, loop */
-       jit_patch_at(jit_bnei(JIT_R0, 0), loop);
+       jit_patch_at(jit_bnei(JIT_V1, 0), loop);
+
+       /* The code LUT will be set to this address when the block at the target
+        * PC has been preprocessed but not yet compiled by the threaded
+        * recompiler */
+       addr = jit_indirect();
 
        /* Slow path: call C function get_next_block_func() */
 
@@ -1044,22 +1065,22 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
                /* We may call the interpreter - update state->current_cycle */
                jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
                           offsetof(struct lightrec_state, target_cycle));
-               jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE);
+               jit_subr(JIT_V1, JIT_R2, LIGHTREC_REG_CYCLE);
                jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
-                          LIGHTREC_REG_STATE, JIT_R1);
+                          LIGHTREC_REG_STATE, JIT_V1);
        }
 
-       /* The code LUT will be set to this address when the block at the target
-        * PC has been preprocessed but not yet compiled by the threaded
-        * recompiler */
-       addr = jit_indirect();
-
-       /* Get the next block */
        jit_prepare();
        jit_pushargr(LIGHTREC_REG_STATE);
        jit_pushargr(JIT_V0);
+
+       /* Save the cycles register if needed */
+       if (!(ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES))
+               jit_movr(JIT_V0, LIGHTREC_REG_CYCLE);
+
+       /* Get the next block */
        jit_finishi(&get_next_block_func);
-       jit_retval(JIT_R0);
+       jit_retval(JIT_V1);
 
        if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
                /* The interpreter may have updated state->current_cycle and
@@ -1069,10 +1090,12 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
                jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
                           offsetof(struct lightrec_state, target_cycle));
                jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
+       } else {
+               jit_movr(LIGHTREC_REG_CYCLE, JIT_V0);
        }
 
        /* If we get non-NULL, loop */
-       jit_patch_at(jit_bnei(JIT_R0, 0), loop);
+       jit_patch_at(jit_bnei(JIT_V1, 0), loop);
 
        /* When exiting, the recompiled code will jump to that address */
        jit_note(__FILE__, __LINE__);
@@ -1083,7 +1106,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state)
 
        block->_jit = _jit;
        block->opcode_list = NULL;
-       block->flags = 0;
+       block->flags = BLOCK_NO_OPCODE_LIST;
        block->nb_ops = 0;
 
        block->function = lightrec_emit_code(state, block, _jit,
@@ -1127,11 +1150,13 @@ unsigned int lightrec_cycles_of_opcode(union code code)
        return 2;
 }
 
-void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block)
+void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *ops)
 {
+       struct opcode_list *list = container_of(ops, struct opcode_list, ops);
+
        lightrec_free(state, MEM_FOR_IR,
-                     sizeof(*block->opcode_list) * block->nb_ops,
-                     block->opcode_list);
+                     sizeof(*list) + list->nb_ops * sizeof(struct opcode),
+                     list);
 }
 
 static unsigned int lightrec_get_mips_block_len(const u32 *src)
@@ -1153,25 +1178,28 @@ static unsigned int lightrec_get_mips_block_len(const u32 *src)
 static struct opcode * lightrec_disassemble(struct lightrec_state *state,
                                            const u32 *src, unsigned int *len)
 {
-       struct opcode *list;
+       struct opcode_list *list;
        unsigned int i, length;
 
        length = lightrec_get_mips_block_len(src);
 
-       list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length);
+       list = lightrec_malloc(state, MEM_FOR_IR,
+                              sizeof(*list) + sizeof(struct opcode) * length);
        if (!list) {
                pr_err("Unable to allocate memory\n");
                return NULL;
        }
 
+       list->nb_ops = (u16) length;
+
        for (i = 0; i < length; i++) {
-               list[i].opcode = LE32TOH(src[i]);
-               list[i].flags = 0;
+               list->ops[i].opcode = LE32TOH(src[i]);
+               list->ops[i].flags = 0;
        }
 
        *len = length * sizeof(u32);
 
-       return list;
+       return list->ops;
 }
 
 static struct block * lightrec_precompile_block(struct lightrec_state *state,
@@ -1179,11 +1207,12 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
 {
        struct opcode *list;
        struct block *block;
-       void *host;
+       void *host, *addr;
        const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc));
        const u32 *code = (u32 *) host;
        unsigned int length;
        bool fully_tagged;
+       u8 block_flags = 0;
 
        if (!map)
                return NULL;
@@ -1209,9 +1238,6 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
        block->flags = 0;
        block->code_size = 0;
        block->precompile_date = state->current_cycle;
-#if ENABLE_THREADED_COMPILER
-       block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT;
-#endif
        block->nb_ops = length / sizeof(u32);
 
        lightrec_optimize(state, block);
@@ -1230,17 +1256,23 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state,
        /* If the first opcode is an 'impossible' branch, never compile the
         * block */
        if (should_emulate(block->opcode_list))
-               block->flags |= BLOCK_NEVER_COMPILE;
+               block_flags |= BLOCK_NEVER_COMPILE;
 
        fully_tagged = lightrec_block_is_fully_tagged(block);
        if (fully_tagged)
-               block->flags |= BLOCK_FULLY_TAGGED;
+               block_flags |= BLOCK_FULLY_TAGGED;
 
-       if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET))
-               lut_write(state, lut_offset(pc), state->memset_func);
+       if (block_flags)
+               block_set_flags(block, block_flags);
 
        block->hash = lightrec_calculate_block_hash(block);
 
+       if (OPT_REPLACE_MEMSET && block_has_flag(block, BLOCK_IS_MEMSET))
+               addr = state->memset_func;
+       else
+               addr = state->get_next_block;
+       lut_write(state, lut_offset(pc), addr);
+
        pr_debug("Recompile count: %u\n", state->nb_precompile++);
 
        return block;
@@ -1310,24 +1342,31 @@ static void lightrec_reap_function(struct lightrec_state *state, void *data)
        lightrec_free_function(state, data);
 }
 
+static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data)
+{
+       lightrec_free_opcode_list(state, data);
+}
+
 int lightrec_compile_block(struct lightrec_cstate *cstate,
                           struct block *block)
 {
        struct lightrec_state *state = cstate->state;
        struct lightrec_branch_target *target;
-       bool op_list_freed = false, fully_tagged = false;
+       bool fully_tagged = false;
        struct block *block2;
        struct opcode *elm;
        jit_state_t *_jit, *oldjit;
        jit_node_t *start_of_block;
        bool skip_next = false;
        void *old_fn, *new_fn;
+       size_t old_code_size;
        unsigned int i, j;
+       u8 old_flags;
        u32 offset;
 
        fully_tagged = lightrec_block_is_fully_tagged(block);
        if (fully_tagged)
-               block->flags |= BLOCK_FULLY_TAGGED;
+               block_set_flags(block, BLOCK_FULLY_TAGGED);
 
        _jit = jit_new_state();
        if (!_jit)
@@ -1335,11 +1374,11 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
 
        oldjit = block->_jit;
        old_fn = block->function;
+       old_code_size = block->code_size;
        block->_jit = _jit;
 
        lightrec_regcache_reset(cstate->reg_cache);
        cstate->cycles = 0;
-       cstate->nb_branches = 0;
        cstate->nb_local_branches = 0;
        cstate->nb_targets = 0;
 
@@ -1377,9 +1416,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                cstate->cycles += lightrec_cycles_of_opcode(elm->c);
        }
 
-       for (i = 0; i < cstate->nb_branches; i++)
-               jit_patch(cstate->branches[i]);
-
        for (i = 0; i < cstate->nb_local_branches; i++) {
                struct lightrec_branch *branch = &cstate->local_branches[i];
 
@@ -1403,7 +1439,6 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                        pr_err("Unable to find branch target\n");
        }
 
-       jit_patch_abs(jit_jmpi(), state->eob_wrapper_func);
        jit_ret();
        jit_epilog();
 
@@ -1412,22 +1447,24 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                if (!ENABLE_THREADED_COMPILER)
                        pr_err("Unable to compile block!\n");
                block->_jit = oldjit;
+               jit_clear_state();
                _jit_destroy_state(_jit);
                return -ENOMEM;
        }
 
+       /* Pause the reaper, because lightrec_reset_lut_offset() may try to set
+        * the old block->function pointer to the code LUT. */
+       if (ENABLE_THREADED_COMPILER)
+               lightrec_reaper_pause(state->reaper);
+
        block->function = new_fn;
-       block->flags &= ~BLOCK_SHOULD_RECOMPILE;
+       block_clear_flags(block, BLOCK_SHOULD_RECOMPILE);
 
        /* Add compiled function to the LUT */
        lut_write(state, lut_offset(block->pc), block->function);
 
-       if (ENABLE_THREADED_COMPILER) {
-               /* Since we might try to reap the same block multiple times,
-                * we need the reaper to wait until everything has been
-                * submitted, so that the duplicate entries can be dropped. */
-               lightrec_reaper_pause(state->reaper);
-       }
+       if (ENABLE_THREADED_COMPILER)
+               lightrec_reaper_continue(state->reaper);
 
        /* Detect old blocks that have been covered by the new one */
        for (i = 0; i < cstate->nb_targets; i++) {
@@ -1437,6 +1474,13 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                        continue;
 
                offset = block->pc + target->offset * sizeof(u32);
+
+               /* Pause the reaper while we search for the block until we set
+                * the BLOCK_IS_DEAD flag, otherwise the block may be removed
+                * under our feet. */
+               if (ENABLE_THREADED_COMPILER)
+                       lightrec_reaper_pause(state->reaper);
+
                block2 = lightrec_find_block(state->block_cache, offset);
                if (block2) {
                        /* No need to check if block2 is compilable - it must
@@ -1444,12 +1488,16 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
 
                        /* Set the "block dead" flag to prevent the dynarec from
                         * recompiling this block */
-                       block2->flags |= BLOCK_IS_DEAD;
+                       old_flags = block_set_flags(block2, BLOCK_IS_DEAD);
+               }
+
+               if (ENABLE_THREADED_COMPILER) {
+                       lightrec_reaper_continue(state->reaper);
 
                        /* If block2 was pending for compilation, cancel it.
                         * If it's being compiled right now, wait until it
                         * finishes. */
-                       if (ENABLE_THREADED_COMPILER)
+                       if (block2)
                                lightrec_recompiler_remove(state->rec, block2);
                }
 
@@ -1464,20 +1512,17 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                                 "0x%08x\n", block2->pc, block->pc);
 
                        /* Finally, reap the block. */
-                       if (ENABLE_THREADED_COMPILER) {
+                       if (!ENABLE_THREADED_COMPILER) {
+                               lightrec_unregister_block(state->block_cache, block2);
+                               lightrec_free_block(state, block2);
+                       } else if (!(old_flags & BLOCK_IS_DEAD)) {
                                lightrec_reaper_add(state->reaper,
                                                    lightrec_reap_block,
                                                    block2);
-                       } else {
-                               lightrec_unregister_block(state->block_cache, block2);
-                               lightrec_free_block(state, block2);
                        }
                }
        }
 
-       if (ENABLE_THREADED_COMPILER)
-               lightrec_reaper_continue(state->reaper);
-
        if (ENABLE_DISASSEMBLER) {
                pr_debug("Compiling block at PC: 0x%08x\n", block->pc);
                jit_disassemble();
@@ -1485,15 +1530,20 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
 
        jit_clear_state();
 
-#if ENABLE_THREADED_COMPILER
        if (fully_tagged)
-               op_list_freed = atomic_flag_test_and_set(&block->op_list_freed);
-#endif
-       if (fully_tagged && !op_list_freed) {
+               old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
+
+       if (fully_tagged && !(old_flags & BLOCK_NO_OPCODE_LIST)) {
                pr_debug("Block PC 0x%08x is fully tagged"
                         " - free opcode list\n", block->pc);
-               lightrec_free_opcode_list(state, block);
-               block->opcode_list = NULL;
+
+               if (ENABLE_THREADED_COMPILER) {
+                       lightrec_reaper_add(state->reaper,
+                                           lightrec_reap_opcode_list,
+                                           block->opcode_list);
+               } else {
+                       lightrec_free_opcode_list(state, block->opcode_list);
+               }
        }
 
        if (oldjit) {
@@ -1509,6 +1559,8 @@ int lightrec_compile_block(struct lightrec_cstate *cstate,
                        _jit_destroy_state(oldjit);
                        lightrec_free_function(state, old_fn);
                }
+
+               lightrec_unregister(MEM_FOR_CODE, old_code_size);
        }
 
        return 0;
@@ -1561,20 +1613,24 @@ u32 lightrec_execute(struct lightrec_state *state, u32 pc, u32 target_cycle)
        return state->next_pc;
 }
 
-u32 lightrec_execute_one(struct lightrec_state *state, u32 pc)
+u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc,
+                            u32 target_cycle)
 {
-       return lightrec_execute(state, pc, state->current_cycle);
-}
-
-u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
-{
-       struct block *block = lightrec_get_block(state, pc);
-       if (!block)
-               return 0;
+       struct block *block;
 
        state->exit_flags = LIGHTREC_EXIT_NORMAL;
+       state->target_cycle = target_cycle;
+
+       do {
+               block = lightrec_get_block(state, pc);
+               if (!block)
+                       break;
+
+               pc = lightrec_emulate_block(state, block, pc);
 
-       pc = lightrec_emulate_block(state, block, pc);
+               if (ENABLE_THREADED_COMPILER)
+                       lightrec_reaper_reap(state->reaper);
+       } while (state->current_cycle < state->target_cycle);
 
        if (LOG_LEVEL >= INFO_L)
                lightrec_print_info(state);
@@ -1584,9 +1640,13 @@ u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
 
 void lightrec_free_block(struct lightrec_state *state, struct block *block)
 {
+       u8 old_flags;
+
        lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
-       if (block->opcode_list)
-               lightrec_free_opcode_list(state, block);
+       old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
+
+       if (!(old_flags & BLOCK_NO_OPCODE_LIST))
+               lightrec_free_opcode_list(state, block->opcode_list);
        if (block->_jit)
                _jit_destroy_state(block->_jit);
        if (block->function) {
@@ -1705,8 +1765,6 @@ struct lightrec_state * lightrec_init(char *argv0,
        state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
        state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
        state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb;
-       state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb;
-       state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb;
 
        map = &state->maps[PSX_MAP_BIOS];
        state->offset_bios = (uintptr_t)map->address - map->pc;
@@ -1714,6 +1772,9 @@ struct lightrec_state * lightrec_init(char *argv0,
        map = &state->maps[PSX_MAP_SCRATCH_PAD];
        state->offset_scratch = (uintptr_t)map->address - map->pc;
 
+       map = &state->maps[PSX_MAP_HW_REGISTERS];
+       state->offset_io = (uintptr_t)map->address - map->pc;
+
        map = &state->maps[PSX_MAP_KERNEL_USER_RAM];
        state->offset_ram = (uintptr_t)map->address - map->pc;
 
@@ -1725,6 +1786,7 @@ struct lightrec_state * lightrec_init(char *argv0,
        if (state->offset_bios == 0 &&
            state->offset_scratch == 0 &&
            state->offset_ram == 0 &&
+           state->offset_io == 0 &&
            state->mirrors_mapped) {
                pr_info("Memory map is perfect. Emitted code will be best.\n");
        } else {
index 3ea8e65..9613da3 100644 (file)
@@ -87,6 +87,7 @@ struct lightrec_mem_map {
 struct lightrec_ops {
        void (*cop2_op)(struct lightrec_state *state, u32 op);
        void (*enable_ram)(struct lightrec_state *state, _Bool enable);
+       _Bool (*hw_direct)(u32 kaddr, _Bool is_write, u8 size);
 };
 
 struct lightrec_registers {
@@ -105,8 +106,8 @@ __api void lightrec_destroy(struct lightrec_state *state);
 
 __api u32 lightrec_execute(struct lightrec_state *state,
                           u32 pc, u32 target_cycle);
-__api u32 lightrec_execute_one(struct lightrec_state *state, u32 pc);
-__api u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc);
+__api u32 lightrec_run_interpreter(struct lightrec_state *state,
+                                  u32 pc, u32 target_cycle);
 
 __api void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len);
 __api void lightrec_invalidate_all(struct lightrec_state *state);
index 8da84ee..2eba60e 100644 (file)
@@ -119,11 +119,31 @@ static u64 opcode_read_mask(union code op)
        }
 }
 
-static u64 opcode_write_mask(union code op)
+static u64 mult_div_write_mask(union code op)
 {
        u64 flags;
 
+       if (!OPT_FLAG_MULT_DIV)
+               return BIT(REG_LO) | BIT(REG_HI);
+
+       if (op.r.rd)
+               flags = BIT(op.r.rd);
+       else
+               flags = BIT(REG_LO);
+       if (op.r.imm)
+               flags |= BIT(op.r.imm);
+       else
+               flags |= BIT(REG_HI);
+
+       return flags;
+}
+
+static u64 opcode_write_mask(union code op)
+{
        switch (op.i.op) {
+       case OP_META_MULT2:
+       case OP_META_MULTU2:
+               return mult_div_write_mask(op);
        case OP_SPECIAL:
                switch (op.r.op) {
                case OP_SPECIAL_JR:
@@ -134,18 +154,7 @@ static u64 opcode_write_mask(union code op)
                case OP_SPECIAL_MULTU:
                case OP_SPECIAL_DIV:
                case OP_SPECIAL_DIVU:
-                       if (!OPT_FLAG_MULT_DIV)
-                               return BIT(REG_LO) | BIT(REG_HI);
-
-                       if (op.r.rd)
-                               flags = BIT(op.r.rd);
-                       else
-                               flags = BIT(REG_LO);
-                       if (op.r.imm)
-                               flags |= BIT(op.r.imm);
-                       else
-                               flags |= BIT(REG_HI);
-                       return flags;
+                       return mult_div_write_mask(op);
                case OP_SPECIAL_MTHI:
                        return BIT(REG_HI);
                case OP_SPECIAL_MTLO:
@@ -361,6 +370,22 @@ static bool opcode_is_store(union code op)
        }
 }
 
+static u8 opcode_get_io_size(union code op)
+{
+       switch (op.i.op) {
+       case OP_LB:
+       case OP_LBU:
+       case OP_SB:
+               return 8;
+       case OP_LH:
+       case OP_LHU:
+       case OP_SH:
+               return 16;
+       default:
+               return 32;
+       }
+}
+
 bool opcode_is_io(union code op)
 {
        return opcode_is_load(op) || opcode_is_store(op);
@@ -601,10 +626,48 @@ static u32 lightrec_propagate_consts(const struct opcode *op,
                                known &= ~BIT(c.r.rd);
                        }
                        break;
+               case OP_SPECIAL_MULT:
+               case OP_SPECIAL_MULTU:
+               case OP_SPECIAL_DIV:
+               case OP_SPECIAL_DIVU:
+                       if (OPT_FLAG_MULT_DIV && c.r.rd)
+                               known &= ~BIT(c.r.rd);
+                       if (OPT_FLAG_MULT_DIV && c.r.imm)
+                               known &= ~BIT(c.r.imm);
+                       break;
                default:
                        break;
                }
                break;
+       case OP_META_MULT2:
+       case OP_META_MULTU2:
+               if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) {
+                       if (c.r.rd) {
+                               known |= BIT(c.r.rd);
+
+                               if (c.r.op < 32)
+                                       v[c.r.rd] = v[c.r.rs] << c.r.op;
+                               else
+                                       v[c.r.rd] = 0;
+                       }
+
+                       if (c.r.imm) {
+                               known |= BIT(c.r.imm);
+
+                               if (c.r.op >= 32)
+                                       v[c.r.imm] = v[c.r.rs] << (c.r.op - 32);
+                               else if (c.i.op == OP_META_MULT2)
+                                       v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op);
+                               else
+                                       v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op);
+                       }
+               } else {
+                       if (OPT_FLAG_MULT_DIV && c.r.rd)
+                               known &= ~BIT(c.r.rd);
+                       if (OPT_FLAG_MULT_DIV && c.r.imm)
+                               known &= ~BIT(c.r.imm);
+               }
+               break;
        case OP_REGIMM:
                break;
        case OP_ADDI:
@@ -911,7 +974,8 @@ static int lightrec_transform_branches(struct lightrec_state *state,
                                op->i.imm = offset;
 
                        }
-               default: /* fall-through */
+                       fallthrough;
+               default:
                        break;
                }
        }
@@ -919,6 +983,11 @@ static int lightrec_transform_branches(struct lightrec_state *state,
        return 0;
 }
 
+static inline bool is_power_of_two(u32 value)
+{
+       return popcount32(value) == 1;
+}
+
 static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
 {
        struct opcode *list = block->opcode_list;
@@ -926,6 +995,7 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
        u32 known = BIT(0);
        u32 values[32] = { 0 };
        unsigned int i;
+       u8 tmp;
 
        for (i = 0; i < block->nb_ops; i++) {
                prev = op;
@@ -1000,6 +1070,28 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
                                        op->r.rs = op->r.rt;
                                }
                                break;
+                       case OP_SPECIAL_MULT:
+                       case OP_SPECIAL_MULTU:
+                               if ((known & BIT(op->r.rs)) &&
+                                   is_power_of_two(values[op->r.rs])) {
+                                       tmp = op->c.i.rs;
+                                       op->c.i.rs = op->c.i.rt;
+                                       op->c.i.rt = tmp;
+                               } else if (!(known & BIT(op->r.rt)) ||
+                                          !is_power_of_two(values[op->r.rt])) {
+                                       break;
+                               }
+
+                               pr_debug("Multiply by power-of-two: %u\n",
+                                        values[op->r.rt]);
+
+                               if (op->r.op == OP_SPECIAL_MULT)
+                                       op->i.op = OP_META_MULT2;
+                               else
+                                       op->i.op = OP_META_MULTU2;
+
+                               op->r.op = ffs32(values[op->r.rt]);
+                               break;
                        case OP_SPECIAL_OR:
                        case OP_SPECIAL_ADD:
                        case OP_SPECIAL_ADDU:
@@ -1028,6 +1120,64 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl
        return 0;
 }
 
+static bool lightrec_can_switch_delay_slot(union code op, union code next_op)
+{
+       switch (op.i.op) {
+       case OP_SPECIAL:
+               switch (op.r.op) {
+               case OP_SPECIAL_JALR:
+                       if (opcode_reads_register(next_op, op.r.rd) ||
+                           opcode_writes_register(next_op, op.r.rd))
+                               return false;
+                       fallthrough;
+               case OP_SPECIAL_JR:
+                       if (opcode_writes_register(next_op, op.r.rs))
+                               return false;
+                       fallthrough;
+               default:
+                       break;
+               }
+               fallthrough;
+       case OP_J:
+               break;
+       case OP_JAL:
+               if (opcode_reads_register(next_op, 31) ||
+                   opcode_writes_register(next_op, 31))
+                       return false;;
+
+               break;
+       case OP_BEQ:
+       case OP_BNE:
+               if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
+                       return false;
+               fallthrough;
+       case OP_BLEZ:
+       case OP_BGTZ:
+               if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
+                       return false;
+               break;
+       case OP_REGIMM:
+               switch (op.r.rt) {
+               case OP_REGIMM_BLTZAL:
+               case OP_REGIMM_BGEZAL:
+                       if (opcode_reads_register(next_op, 31) ||
+                           opcode_writes_register(next_op, 31))
+                               return false;
+                       fallthrough;
+               case OP_REGIMM_BLTZ:
+               case OP_REGIMM_BGEZ:
+                       if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
+                               return false;
+                       break;
+               }
+               fallthrough;
+       default:
+               break;
+       }
+
+       return true;
+}
+
 static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
 {
        struct opcode *list, *next = &block->opcode_list[0];
@@ -1050,71 +1200,20 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc
                    !op_flag_no_ds(block->opcode_list[i - 1].flags))
                        continue;
 
-               if (op_flag_sync(list->flags) || op_flag_sync(next->flags))
+               if (op_flag_sync(next->flags))
                        continue;
 
-               switch (list->i.op) {
-               case OP_SPECIAL:
-                       switch (op.r.op) {
-                       case OP_SPECIAL_JALR:
-                               if (opcode_reads_register(next_op, op.r.rd) ||
-                                   opcode_writes_register(next_op, op.r.rd))
-                                       continue;
-                               fallthrough;
-                       case OP_SPECIAL_JR:
-                               if (opcode_writes_register(next_op, op.r.rs))
-                                       continue;
-                               fallthrough;
-                       default:
-                               break;
-                       }
-                       fallthrough;
-               case OP_J:
-                       break;
-               case OP_JAL:
-                       if (opcode_reads_register(next_op, 31) ||
-                           opcode_writes_register(next_op, 31))
-                               continue;
-                       else
-                               break;
-               case OP_BEQ:
-               case OP_BNE:
-                       if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
-                               continue;
-                       fallthrough;
-               case OP_BLEZ:
-               case OP_BGTZ:
-                       if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
-                               continue;
-                       break;
-               case OP_REGIMM:
-                       switch (op.r.rt) {
-                       case OP_REGIMM_BLTZAL:
-                       case OP_REGIMM_BGEZAL:
-                               if (opcode_reads_register(next_op, 31) ||
-                                   opcode_writes_register(next_op, 31))
-                                       continue;
-                               fallthrough;
-                       case OP_REGIMM_BLTZ:
-                       case OP_REGIMM_BGEZ:
-                               if (op.i.rs &&
-                                   opcode_writes_register(next_op, op.i.rs))
-                                       continue;
-                               break;
-                       }
-                       fallthrough;
-               default:
-                       break;
-               }
+               if (!lightrec_can_switch_delay_slot(list->c, next_op))
+                       continue;
 
                pr_debug("Swap branch and delay slot opcodes "
                         "at offsets 0x%x / 0x%x\n",
                         i << 2, (i + 1) << 2);
 
-               flags = next->flags;
+               flags = next->flags | (list->flags & LIGHTREC_SYNC);
                list->c = next_op;
                next->c = op;
-               next->flags = list->flags | LIGHTREC_NO_DS;
+               next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC;
                list->flags = flags | LIGHTREC_NO_DS;
        }
 
@@ -1123,7 +1222,7 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc
 
 static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
 {
-       struct opcode *list;
+       struct opcode_list *list, *old_list;
 
        if (new_size >= block->nb_ops) {
                pr_err("Invalid shrink size (%u vs %u)\n",
@@ -1131,19 +1230,20 @@ static int shrink_opcode_list(struct lightrec_state *state, struct block *block,
                return -EINVAL;
        }
 
-
        list = lightrec_malloc(state, MEM_FOR_IR,
-                              sizeof(*list) * new_size);
+                              sizeof(*list) + sizeof(struct opcode) * new_size);
        if (!list) {
                pr_err("Unable to allocate memory\n");
                return -ENOMEM;
        }
 
-       memcpy(list, block->opcode_list, sizeof(*list) * new_size);
+       old_list = container_of(block->opcode_list, struct opcode_list, ops);
+       memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size);
 
-       lightrec_free_opcode_list(state, block);
-       block->opcode_list = list;
+       lightrec_free_opcode_list(state, block->opcode_list);
+       list->nb_ops = new_size;
        block->nb_ops = new_size;
+       block->opcode_list = list->ops;
 
        pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
                 block->pc, new_size);
@@ -1449,6 +1549,7 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
        u32 values[32] = { 0 };
        unsigned int i;
        u32 val, kunseg_val;
+       bool no_mask;
 
        for (i = 0; i < block->nb_ops; i++) {
                prev = list;
@@ -1483,7 +1584,7 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
                                    kunseg(values[list->i.rs]) < (kunseg(block->pc) +
                                                                  block->nb_ops * 4)) {
                                        pr_debug("Self-modifying block detected\n");
-                                       block->flags |= BLOCK_NEVER_COMPILE;
+                                       block_set_flags(block, BLOCK_NEVER_COMPILE);
                                        list->flags |= LIGHTREC_SMC;
                                }
                        }
@@ -1505,10 +1606,11 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
                                psx_map = lightrec_get_map_idx(state, kunseg_val);
 
                                list->flags &= ~LIGHTREC_IO_MASK;
+                               no_mask = val == kunseg_val;
 
                                switch (psx_map) {
                                case PSX_MAP_KERNEL_USER_RAM:
-                                       if (val == kunseg_val)
+                                       if (no_mask)
                                                list->flags |= LIGHTREC_NO_MASK;
                                        fallthrough;
                                case PSX_MAP_MIRROR1:
@@ -1516,19 +1618,36 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block)
                                case PSX_MAP_MIRROR3:
                                        pr_debug("Flaging opcode %u as RAM access\n", i);
                                        list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
+                                       if (no_mask && state->mirrors_mapped)
+                                               list->flags |= LIGHTREC_NO_MASK;
                                        break;
                                case PSX_MAP_BIOS:
                                        pr_debug("Flaging opcode %u as BIOS access\n", i);
                                        list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
+                                       if (no_mask)
+                                               list->flags |= LIGHTREC_NO_MASK;
                                        break;
                                case PSX_MAP_SCRATCH_PAD:
                                        pr_debug("Flaging opcode %u as scratchpad access\n", i);
                                        list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
+                                       if (no_mask)
+                                               list->flags |= LIGHTREC_NO_MASK;
 
                                        /* Consider that we're never going to run code from
                                         * the scratchpad. */
                                        list->flags |= LIGHTREC_NO_INVALIDATE;
                                        break;
+                               case PSX_MAP_HW_REGISTERS:
+                                       if (state->ops.hw_direct &&
+                                           state->ops.hw_direct(kunseg_val,
+                                                                opcode_is_store(list->c),
+                                                                opcode_get_io_size(list->c))) {
+                                               pr_debug("Flagging opcode %u as direct I/O access\n",
+                                                        i);
+                                               list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW);
+                                               break;
+                                       }
+                                       fallthrough;
                                default:
                                        pr_debug("Flagging opcode %u as I/O access\n",
                                                 i);
@@ -1591,6 +1710,9 @@ static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset,
                        }
 
                        return mflo ? REG_LO : REG_HI;
+               case OP_META_MULT2:
+               case OP_META_MULTU2:
+                       return 0;
                case OP_SPECIAL:
                        switch (op->r.op) {
                        case OP_SPECIAL_MULT:
@@ -1736,20 +1858,26 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *
                if (prev)
                        known = lightrec_propagate_consts(list, prev, known, values);
 
-               if (list->i.op != OP_SPECIAL)
-                       continue;
-
-               switch (list->r.op) {
-               case OP_SPECIAL_DIV:
-               case OP_SPECIAL_DIVU:
-                       /* If we are dividing by a non-zero constant, don't
-                        * emit the div-by-zero check. */
-                       if (lightrec_always_skip_div_check() ||
-                           (known & BIT(list->c.r.rt) && values[list->c.r.rt]))
-                               list->flags |= LIGHTREC_NO_DIV_CHECK;
+               switch (list->i.op) {
+               case OP_SPECIAL:
+                       switch (list->r.op) {
+                       case OP_SPECIAL_DIV:
+                       case OP_SPECIAL_DIVU:
+                               /* If we are dividing by a non-zero constant, don't
+                                * emit the div-by-zero check. */
+                               if (lightrec_always_skip_div_check() ||
+                                   ((known & BIT(list->c.r.rt)) && values[list->c.r.rt]))
+                                       list->flags |= LIGHTREC_NO_DIV_CHECK;
+                               fallthrough;
+                       case OP_SPECIAL_MULT:
+                       case OP_SPECIAL_MULTU:
+                               break;
+                       default:
+                               continue;
+                       }
                        fallthrough;
-               case OP_SPECIAL_MULT:
-               case OP_SPECIAL_MULTU:
+               case OP_META_MULT2:
+               case OP_META_MULTU2:
                        break;
                default:
                        continue;
@@ -1929,7 +2057,8 @@ static int lightrec_replace_memset(struct lightrec_state *state, struct block *b
                if (i == ARRAY_SIZE(memset_code) - 1) {
                        /* success! */
                        pr_debug("Block at PC 0x%x is a memset\n", block->pc);
-                       block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE;
+                       block_set_flags(block,
+                                       BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE);
 
                        /* Return non-zero to skip other optimizers. */
                        return 1;
index 2e32cae..777b997 100644 (file)
@@ -24,8 +24,10 @@ struct reaper_elm {
 struct reaper {
        struct lightrec_state *state;
        pthread_mutex_t mutex;
+       pthread_cond_t cond;
        struct slist_elm reap_list;
 
+       bool running;
        atomic_uint sem;
 };
 
@@ -41,22 +43,36 @@ struct reaper *lightrec_reaper_init(struct lightrec_state *state)
        }
 
        reaper->state = state;
+       reaper->running = false;
        reaper->sem = 0;
        slist_init(&reaper->reap_list);
 
        ret = pthread_mutex_init(&reaper->mutex, NULL);
        if (ret) {
                pr_err("Cannot init mutex variable: %d\n", ret);
-               lightrec_free(reaper->state, MEM_FOR_LIGHTREC,
-                             sizeof(*reaper), reaper);
-               return NULL;
+               goto err_free_reaper;
+       }
+
+       ret = pthread_cond_init(&reaper->cond, NULL);
+       if (ret) {
+               pr_err("Cannot init cond variable: %d\n", ret);
+               goto err_destroy_mutex;
        }
 
        return reaper;
+
+err_destroy_mutex:
+       pthread_mutex_destroy(&reaper->mutex);
+err_free_reaper:
+       lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper);
+       return NULL;
 }
 
 void lightrec_reaper_destroy(struct reaper *reaper)
 {
+       lightrec_reaper_reap(reaper);
+
+       pthread_cond_destroy(&reaper->cond);
        pthread_mutex_destroy(&reaper->mutex);
        lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper);
 }
@@ -108,6 +124,7 @@ void lightrec_reaper_reap(struct reaper *reaper)
        while (lightrec_reaper_can_reap(reaper) &&
               !!(elm = slist_first(&reaper->reap_list))) {
                slist_remove(&reaper->reap_list, elm);
+               reaper->running = true;
                pthread_mutex_unlock(&reaper->mutex);
 
                reaper_elm = container_of(elm, struct reaper_elm, slist);
@@ -118,6 +135,8 @@ void lightrec_reaper_reap(struct reaper *reaper)
                              sizeof(*reaper_elm), reaper_elm);
 
                pthread_mutex_lock(&reaper->mutex);
+               reaper->running = false;
+               pthread_cond_broadcast(&reaper->cond);
        }
 
        pthread_mutex_unlock(&reaper->mutex);
@@ -126,6 +145,11 @@ void lightrec_reaper_reap(struct reaper *reaper)
 void lightrec_reaper_pause(struct reaper *reaper)
 {
        atomic_fetch_add_explicit(&reaper->sem, 1, memory_order_relaxed);
+
+       pthread_mutex_lock(&reaper->mutex);
+       while (reaper->running)
+               pthread_cond_wait(&reaper->cond, &reaper->mutex);
+       pthread_mutex_unlock(&reaper->mutex);
 }
 
 void lightrec_reaper_continue(struct reaper *reaper)
index 7350adb..08a9235 100644 (file)
@@ -106,29 +106,20 @@ static bool lightrec_cancel_block_rec(struct recompiler *rec,
 static void lightrec_cancel_list(struct recompiler *rec)
 {
        struct block_rec *block_rec;
-       struct slist_elm *next;
-
-       while (!!(next = lightrec_get_first_elm(&rec->slist))) {
-               block_rec = container_of(next, struct block_rec, slist);
+       struct slist_elm *elm, *head = &rec->slist;
 
+       for (elm = slist_first(head); elm; elm = slist_first(head)) {
+               block_rec = container_of(elm, struct block_rec, slist);
                lightrec_cancel_block_rec(rec, block_rec);
        }
-
-       pthread_cond_broadcast(&rec->cond2);
 }
 
 static void lightrec_flush_code_buffer(struct lightrec_state *state, void *d)
 {
        struct recompiler *rec = d;
 
-       pthread_mutex_lock(&rec->mutex);
-
-       if (rec->must_flush) {
-               lightrec_remove_outdated_blocks(state->block_cache, NULL);
-               rec->must_flush = false;
-       }
-
-       pthread_mutex_unlock(&rec->mutex);
+       lightrec_remove_outdated_blocks(state->block_cache, NULL);
+       rec->must_flush = false;
 }
 
 static void lightrec_compile_list(struct recompiler *rec,
@@ -146,19 +137,23 @@ static void lightrec_compile_list(struct recompiler *rec,
 
                pthread_mutex_unlock(&rec->mutex);
 
-               if (likely(!(block->flags & BLOCK_IS_DEAD))) {
+               if (likely(!block_has_flag(block, BLOCK_IS_DEAD))) {
                        ret = lightrec_compile_block(thd->cstate, block);
                        if (ret == -ENOMEM) {
                                /* Code buffer is full. Request the reaper to
                                 * flush it. */
 
                                pthread_mutex_lock(&rec->mutex);
+                               block_rec->compiling = false;
+                               pthread_cond_broadcast(&rec->cond2);
+
                                if (!rec->must_flush) {
+                                       rec->must_flush = true;
+                                       lightrec_cancel_list(rec);
+
                                        lightrec_reaper_add(rec->state->reaper,
                                                            lightrec_flush_code_buffer,
                                                            rec);
-                                       lightrec_cancel_list(rec);
-                                       rec->must_flush = true;
                                }
                                return;
                        }
@@ -174,7 +169,7 @@ static void lightrec_compile_list(struct recompiler *rec,
                slist_remove(&rec->slist, next);
                lightrec_free(rec->state, MEM_FOR_LIGHTREC,
                              sizeof(*block_rec), block_rec);
-               pthread_cond_signal(&rec->cond2);
+               pthread_cond_broadcast(&rec->cond2);
        }
 }
 
@@ -333,7 +328,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
 
        /* If the block is marked as dead, don't compile it, it will be removed
         * as soon as it's safe. */
-       if (block->flags & BLOCK_IS_DEAD)
+       if (block_has_flag(block, BLOCK_IS_DEAD))
                goto out_unlock;
 
        for (elm = slist_first(&rec->slist), prev = NULL; elm;
@@ -345,7 +340,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
                         * it to the top of the list, unless the block is being
                         * recompiled. */
                        if (prev && !block_rec->compiling &&
-                           !(block->flags & BLOCK_SHOULD_RECOMPILE)) {
+                           !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) {
                                slist_remove_next(prev);
                                slist_append(&rec->slist, elm);
                        }
@@ -356,7 +351,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
 
        /* By the time this function was called, the block has been recompiled
         * and ins't in the wait list anymore. Just return here. */
-       if (block->function && !(block->flags & BLOCK_SHOULD_RECOMPILE))
+       if (block->function && !block_has_flag(block, BLOCK_SHOULD_RECOMPILE))
                goto out_unlock;
 
        block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC,
@@ -375,7 +370,7 @@ int lightrec_recompiler_add(struct recompiler *rec, struct block *block)
 
        /* If the block is being recompiled, push it to the end of the queue;
         * otherwise push it to the front of the queue. */
-       if (block->flags & BLOCK_SHOULD_RECOMPILE)
+       if (block_has_flag(block, BLOCK_SHOULD_RECOMPILE))
                for (; elm->next; elm = elm->next);
 
        slist_append(elm, &block_rec->slist);
@@ -419,31 +414,36 @@ out_unlock:
 void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
                                          struct block *block, u32 *pc)
 {
-       bool freed;
+       u8 old_flags;
 
        /* There's no point in running the first pass if the block will never
         * be compiled. Let the main loop run the interpreter instead. */
-       if (block->flags & BLOCK_NEVER_COMPILE)
+       if (block_has_flag(block, BLOCK_NEVER_COMPILE))
                return NULL;
 
+       /* The block is marked as dead, and will be removed the next time the
+        * reaper is run. In the meantime, the old function can still be
+        * executed. */
+       if (block_has_flag(block, BLOCK_IS_DEAD))
+               return block->function;
+
        /* If the block is already fully tagged, there is no point in running
         * the first pass. Request a recompilation of the block, and maybe the
         * interpreter will run the block in the meantime. */
-       if (block->flags & BLOCK_FULLY_TAGGED)
+       if (block_has_flag(block, BLOCK_FULLY_TAGGED))
                lightrec_recompiler_add(state->rec, block);
 
        if (likely(block->function)) {
-               if (block->flags & BLOCK_FULLY_TAGGED) {
-                       freed = atomic_flag_test_and_set(&block->op_list_freed);
+               if (block_has_flag(block, BLOCK_FULLY_TAGGED)) {
+                       old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
 
-                       if (!freed) {
+                       if (!(old_flags & BLOCK_NO_OPCODE_LIST)) {
                                pr_debug("Block PC 0x%08x is fully tagged"
                                         " - free opcode list\n", block->pc);
 
                                /* The block was already compiled but the opcode list
                                 * didn't get freed yet - do it now */
-                               lightrec_free_opcode_list(state, block);
-                               block->opcode_list = NULL;
+                               lightrec_free_opcode_list(state, block->opcode_list);
                        }
                }
 
@@ -452,23 +452,25 @@ void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
 
        /* Mark the opcode list as freed, so that the threaded compiler won't
         * free it while we're using it in the interpreter. */
-       freed = atomic_flag_test_and_set(&block->op_list_freed);
+       old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
 
        /* Block wasn't compiled yet - run the interpreter */
        *pc = lightrec_emulate_block(state, block, *pc);
 
-       if (!freed)
-               atomic_flag_clear(&block->op_list_freed);
+       if (!(old_flags & BLOCK_NO_OPCODE_LIST))
+               block_clear_flags(block, BLOCK_NO_OPCODE_LIST);
 
        /* The block got compiled while the interpreter was running.
         * We can free the opcode list now. */
-       if (block->function && (block->flags & BLOCK_FULLY_TAGGED) &&
-           !atomic_flag_test_and_set(&block->op_list_freed)) {
-               pr_debug("Block PC 0x%08x is fully tagged"
-                        " - free opcode list\n", block->pc);
+       if (block->function && block_has_flag(block, BLOCK_FULLY_TAGGED)) {
+               old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
 
-               lightrec_free_opcode_list(state, block);
-               block->opcode_list = NULL;
+               if (!(old_flags & BLOCK_NO_OPCODE_LIST)) {
+                       pr_debug("Block PC 0x%08x is fully tagged"
+                                " - free opcode list\n", block->pc);
+
+                       lightrec_free_opcode_list(state, block->opcode_list);
+               }
        }
 
        return NULL;
index 791a9c5..1f11d8a 100644 (file)
 #include <stdbool.h>
 #include <stddef.h>
 
+enum reg_priority {
+       REG_IS_TEMP,
+       REG_IS_TEMP_VALUE,
+       REG_IS_ZERO,
+       REG_IS_LOADED,
+       REG_IS_DIRTY,
+
+       REG_NB_PRIORITIES,
+};
+
 struct native_register {
-       bool used, loaded, dirty, output, extend, extended,
+       bool used, output, extend, extended,
             zero_extend, zero_extended, locked;
        s8 emulated_register;
+       intptr_t value;
+       enum reg_priority prio;
 };
 
 struct regcache {
@@ -69,7 +81,11 @@ static inline u8 lightrec_reg_to_lightning(const struct regcache *cache,
                const struct native_register *nreg)
 {
        u8 offset = lightrec_reg_number(cache, nreg);
-       return offset < NUM_REGS ? JIT_V(offset) : JIT_R(offset - NUM_REGS);
+
+       if (offset < NUM_REGS)
+               return JIT_V(FIRST_REG + offset);
+       else
+               return JIT_R(FIRST_TEMP + offset - NUM_REGS);
 }
 
 static inline struct native_register * lightning_reg_to_lightrec(
@@ -78,14 +94,14 @@ static inline struct native_register * lightning_reg_to_lightrec(
        if ((JIT_V0 > JIT_R0 && reg >= JIT_V0) ||
                        (JIT_V0 < JIT_R0 && reg < JIT_R0)) {
                if (JIT_V1 > JIT_V0)
-                       return &cache->lightrec_regs[reg - JIT_V0];
+                       return &cache->lightrec_regs[reg - JIT_V(FIRST_REG)];
                else
-                       return &cache->lightrec_regs[JIT_V0 - reg];
+                       return &cache->lightrec_regs[JIT_V(FIRST_REG) - reg];
        } else {
                if (JIT_R1 > JIT_R0)
-                       return &cache->lightrec_regs[NUM_REGS + reg - JIT_R0];
+                       return &cache->lightrec_regs[NUM_REGS + reg - JIT_R(FIRST_TEMP)];
                else
-                       return &cache->lightrec_regs[NUM_REGS + JIT_R0 - reg];
+                       return &cache->lightrec_regs[NUM_REGS + JIT_R(FIRST_TEMP) - reg];
        }
 }
 
@@ -119,6 +135,8 @@ void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags)
 
 static struct native_register * alloc_temp(struct regcache *cache)
 {
+       struct native_register *elm, *nreg = NULL;
+       enum reg_priority best = REG_NB_PRIORITIES;
        unsigned int i;
 
        /* We search the register list in reverse order. As temporaries are
@@ -126,18 +144,18 @@ static struct native_register * alloc_temp(struct regcache *cache)
         * caller-saved registers, as they won't have to be saved back to
         * memory. */
        for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) {
-               struct native_register *nreg = &cache->lightrec_regs[i - 1];
-               if (!nreg->used && !nreg->loaded && !nreg->dirty)
-                       return nreg;
-       }
+               elm = &cache->lightrec_regs[i - 1];
 
-       for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) {
-               struct native_register *nreg = &cache->lightrec_regs[i - 1];
-               if (!nreg->used)
-                       return nreg;
+               if (!elm->used && elm->prio < best) {
+                       nreg = elm;
+                       best = elm->prio;
+
+                       if (best == REG_IS_TEMP)
+                               break;
+               }
        }
 
-       return NULL;
+       return nreg;
 }
 
 static struct native_register * find_mapped_reg(struct regcache *cache,
@@ -147,9 +165,9 @@ static struct native_register * find_mapped_reg(struct regcache *cache,
 
        for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
                struct native_register *nreg = &cache->lightrec_regs[i];
-               if ((!reg || nreg->loaded || nreg->dirty) &&
-                               nreg->emulated_register == reg &&
-                               (!out || !nreg->locked))
+               if ((nreg->prio >= REG_IS_ZERO) &&
+                   nreg->emulated_register == reg &&
+                   (!out || !nreg->locked))
                        return nreg;
        }
 
@@ -159,7 +177,8 @@ static struct native_register * find_mapped_reg(struct regcache *cache,
 static struct native_register * alloc_in_out(struct regcache *cache,
                                             u8 reg, bool out)
 {
-       struct native_register *nreg;
+       struct native_register *elm, *nreg = NULL;
+       enum reg_priority best = REG_NB_PRIORITIES;
        unsigned int i;
 
        /* Try to find if the register is already mapped somewhere */
@@ -167,48 +186,39 @@ static struct native_register * alloc_in_out(struct regcache *cache,
        if (nreg)
                return nreg;
 
-       /* Try to allocate a non-dirty, non-loaded register.
-        * Loaded registers may be re-used later, so it's better to avoid
-        * re-using one if possible. */
-       for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
-               nreg = &cache->lightrec_regs[i];
-               if (!nreg->used && !nreg->dirty && !nreg->loaded)
-                       return nreg;
-       }
+       nreg = NULL;
 
-       /* Try to allocate a non-dirty register */
        for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
-               nreg = &cache->lightrec_regs[i];
-               if (!nreg->used && !nreg->dirty)
-                       return nreg;
-       }
+               elm = &cache->lightrec_regs[i];
 
-       for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
-               nreg = &cache->lightrec_regs[i];
-               if (!nreg->used)
-                       return nreg;
+               if (!elm->used && elm->prio < best) {
+                       nreg = elm;
+                       best = elm->prio;
+
+                       if (best == REG_IS_TEMP)
+                               break;
+               }
        }
 
-       return NULL;
+       return nreg;
 }
 
 static void lightrec_discard_nreg(struct native_register *nreg)
 {
        nreg->extended = false;
        nreg->zero_extended = false;
-       nreg->loaded = false;
        nreg->output = false;
-       nreg->dirty = false;
        nreg->used = false;
        nreg->locked = false;
        nreg->emulated_register = -1;
+       nreg->prio = 0;
 }
 
 static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit,
                struct native_register *nreg, u8 jit_reg)
 {
        /* If we get a dirty register, store back the old value */
-       if (nreg->dirty) {
+       if (nreg->prio == REG_IS_DIRTY) {
                s16 offset = offsetof(struct lightrec_state, regs.gpr)
                        + (nreg->emulated_register << 2);
 
@@ -253,6 +263,7 @@ u8 lightrec_alloc_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
        lightrec_unload_nreg(cache, _jit, reg, jit_reg);
 
        reg->used = true;
+       reg->prio = REG_IS_LOADED;
        return jit_reg;
 }
 
@@ -269,10 +280,38 @@ u8 lightrec_alloc_reg_temp(struct regcache *cache, jit_state_t *_jit)
        jit_reg = lightrec_reg_to_lightning(cache, nreg);
        lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
 
+       nreg->prio = REG_IS_TEMP;
        nreg->used = true;
        return jit_reg;
 }
 
+s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value)
+{
+       struct native_register *nreg;
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
+               nreg = &cache->lightrec_regs[i];
+
+               if (nreg->prio == REG_IS_TEMP_VALUE && nreg->value == value) {
+                       nreg->used = true;
+                       return lightrec_reg_to_lightning(cache, nreg);
+               }
+       }
+
+       return -1;
+}
+
+void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value)
+{
+       struct native_register *nreg;
+
+       nreg = lightning_reg_to_lightrec(cache, jit_reg);
+
+       nreg->prio = REG_IS_TEMP_VALUE;
+       nreg->value = value;
+}
+
 u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
                          u8 reg, u8 flags)
 {
@@ -303,6 +342,7 @@ u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
        nreg->emulated_register = reg;
        nreg->extend = flags & REG_EXT;
        nreg->zero_extend = flags & REG_ZEXT;
+       nreg->prio = reg ? REG_IS_LOADED : REG_IS_ZERO;
        return jit_reg;
 }
 
@@ -333,7 +373,7 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
        if (reg_changed)
                lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
 
-       if (!nreg->loaded && !nreg->dirty && reg != 0) {
+       if (nreg->prio < REG_IS_LOADED && reg != 0) {
                s16 offset = offsetof(struct lightrec_state, regs.gpr)
                        + (reg << 2);
 
@@ -346,15 +386,15 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
                else
                        jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
 
-               nreg->loaded = true;
+               nreg->prio = REG_IS_LOADED;
        }
 
        /* Clear register r0 before use */
-       if (reg == 0 && (!nreg->loaded || nreg->dirty)) {
+       if (reg == 0 && nreg->prio != REG_IS_ZERO) {
                jit_movi(jit_reg, 0);
                nreg->extended = true;
                nreg->zero_extended = true;
-               nreg->loaded = true;
+               nreg->prio = REG_IS_ZERO;
        }
 
        nreg->used = true;
@@ -399,8 +439,8 @@ u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
        nreg->extended = true;
        nreg->zero_extended = false;
        nreg->used = true;
-       nreg->loaded = true;
        nreg->emulated_register = reg;
+       nreg->prio = REG_IS_LOADED;
 
        return jit_reg;
 }
@@ -409,7 +449,7 @@ static void free_reg(struct native_register *nreg)
 {
        /* Set output registers as dirty */
        if (nreg->used && nreg->output && nreg->emulated_register > 0)
-               nreg->dirty = true;
+               nreg->prio = REG_IS_DIRTY;
        if (nreg->output) {
                nreg->extended = nreg->extend;
                nreg->zero_extended = nreg->zero_extend;
@@ -434,13 +474,18 @@ void lightrec_free_regs(struct regcache *cache)
 static void clean_reg(jit_state_t *_jit,
                struct native_register *nreg, u8 jit_reg, bool clean)
 {
-       if (nreg->dirty) {
+       if (nreg->prio == REG_IS_DIRTY) {
                s16 offset = offsetof(struct lightrec_state, regs.gpr)
                        + (nreg->emulated_register << 2);
 
                jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg);
-               nreg->loaded |= nreg->dirty;
-               nreg->dirty ^= clean;
+
+               if (clean) {
+                       if (nreg->emulated_register == 0)
+                               nreg->prio = REG_IS_ZERO;
+                       else
+                               nreg->prio = REG_IS_LOADED;
+               }
        }
 }
 
@@ -448,11 +493,13 @@ static void clean_regs(struct regcache *cache, jit_state_t *_jit, bool clean)
 {
        unsigned int i;
 
-       for (i = 0; i < NUM_REGS; i++)
-               clean_reg(_jit, &cache->lightrec_regs[i], JIT_V(i), clean);
+       for (i = 0; i < NUM_REGS; i++) {
+               clean_reg(_jit, &cache->lightrec_regs[i],
+                         JIT_V(FIRST_REG + i), clean);
+       }
        for (i = 0; i < NUM_TEMPS; i++) {
                clean_reg(_jit, &cache->lightrec_regs[i + NUM_REGS],
-                               JIT_R(i), clean);
+                               JIT_R(FIRST_TEMP + i), clean);
        }
 }
 
@@ -466,6 +513,17 @@ void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit)
        clean_regs(cache, _jit, true);
 }
 
+bool lightrec_has_dirty_regs(struct regcache *cache)
+{
+       unsigned int i;
+
+       for (i = 0; i < NUM_REGS + NUM_TEMPS; i++)
+               if (cache->lightrec_regs[i].prio == REG_IS_DIRTY)
+                       return true;
+
+       return false;
+}
+
 void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
 {
        struct native_register *reg;
@@ -557,15 +615,18 @@ void lightrec_regcache_mark_live(struct regcache *cache, jit_state_t *_jit)
        for (i = 0; i < NUM_REGS; i++) {
                nreg = &cache->lightrec_regs[i];
 
-               if (nreg->used || nreg->loaded || nreg->dirty)
-                       jit_live(JIT_V(i));
+               if (nreg->used || nreg->prio > REG_IS_TEMP)
+                       jit_live(JIT_V(FIRST_REG + i));
        }
 #endif
 
        for (i = 0; i < NUM_TEMPS; i++) {
                nreg = &cache->lightrec_regs[NUM_REGS + i];
 
-               if (nreg->used || nreg->loaded || nreg->dirty)
-                       jit_live(JIT_R(i));
+               if (nreg->used || nreg->prio > REG_IS_TEMP)
+                       jit_live(JIT_R(FIRST_TEMP + i));
        }
+
+       jit_live(LIGHTREC_REG_STATE);
+       jit_live(LIGHTREC_REG_CYCLE);
 }
index 5aa5050..cffbf05 100644 (file)
@@ -6,12 +6,25 @@
 #ifndef __REGCACHE_H__
 #define __REGCACHE_H__
 
-#include "lightrec-private.h"
+#include "lightning-wrapper.h"
 
-#define NUM_REGS (JIT_V_NUM - 2)
-#define NUM_TEMPS (JIT_R_NUM)
+#define NUM_REGS (JIT_V_NUM - 1)
 #define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
-#define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2))
+
+#if defined(__powerpc__)
+#  define NUM_TEMPS JIT_R_NUM
+/* JIT_R0 is callee-saved on PowerPC, we have to use something else */
+#  define LIGHTREC_REG_CYCLE _R10
+#  define FIRST_TEMP 0
+#else
+#  define NUM_TEMPS (JIT_R_NUM - 1)
+#  define LIGHTREC_REG_CYCLE JIT_R0
+#  define FIRST_TEMP 1
+#endif
+
+#include "lightrec-private.h"
+
+#define FIRST_REG 0
 
 /* Flags for lightrec_alloc_reg_in / lightrec_alloc_reg_out. */
 #define REG_EXT                BIT(0) /* register is sign-extended */
@@ -35,6 +48,9 @@ u8 lightrec_alloc_reg_in(struct regcache *cache, jit_state_t *_jit,
 u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
                           u8 reg, u8 jit_reg);
 
+s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value);
+void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value);
+
 u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg);
 void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags);
 
@@ -47,6 +63,7 @@ void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
 void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit);
 void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
 void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit);
+_Bool lightrec_has_dirty_regs(struct regcache *cache);
 
 void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit,
                                  u8 reg, _Bool unload);
index e7afc5c..23015a4 100644 (file)
@@ -24,6 +24,7 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
+#include <pthread.h>
 
 #if defined(__hpux) && defined(__hppa__)
 #  include <machine/param.h>
@@ -913,6 +914,10 @@ typedef enum {
 #define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ul,u,v)
 #endif
 
+    jit_code_casr,             jit_code_casi,
+#define jit_casr(u, v, w, x)   jit_new_node_wwq(jit_code_casr, u, v, w, x)
+#define jit_casi(u, v, w, x)   jit_new_node_wwq(jit_code_casi, u, v, w, x)
+
     jit_code_last_code
 } jit_code_t;
 
@@ -1081,6 +1086,10 @@ extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t,
 extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t,
                                     jit_int32_t, jit_int32_t,
                                     jit_word_t, jit_word_t);
+#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h)
+extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t,
+                                    jit_word_t, jit_word_t,
+                                    jit_int32_t, jit_int32_t);
 #define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w)
 extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t,
                                     jit_word_t, jit_word_t, jit_float32_t);
index 52d37f0..13c11e3 100644 (file)
@@ -53,8 +53,6 @@ static char *name = "retroarch.exe";
 
 static bool use_lightrec_interpreter;
 static bool use_pcsx_interpreter;
-static bool lightrec_debug;
-static bool lightrec_very_debug;
 static bool booting;
 static u32 lightrec_begin_cycles;
 
@@ -305,9 +303,86 @@ static void lightrec_enable_ram(struct lightrec_state *state, bool enable)
                memcpy(cache_buf, psxM, sizeof(cache_buf));
 }
 
+static bool lightrec_can_hw_direct(u32 kaddr, bool is_write, u8 size)
+{
+       switch (size) {
+       case 8:
+               switch (kaddr) {
+               case 0x1f801040:
+               case 0x1f801050:
+               case 0x1f801800:
+               case 0x1f801801:
+               case 0x1f801802:
+               case 0x1f801803:
+                       return false;
+               default:
+                       return true;
+               }
+       case 16:
+               switch (kaddr) {
+               case 0x1f801040:
+               case 0x1f801044:
+               case 0x1f801048:
+               case 0x1f80104a:
+               case 0x1f80104e:
+               case 0x1f801050:
+               case 0x1f801054:
+               case 0x1f80105a:
+               case 0x1f80105e:
+               case 0x1f801100:
+               case 0x1f801104:
+               case 0x1f801108:
+               case 0x1f801110:
+               case 0x1f801114:
+               case 0x1f801118:
+               case 0x1f801120:
+               case 0x1f801124:
+               case 0x1f801128:
+                       return false;
+               case 0x1f801070:
+               case 0x1f801074:
+                       return !is_write;
+               default:
+                       return is_write || kaddr < 0x1f801c00 || kaddr >= 0x1f801e00;
+               }
+       default:
+               switch (kaddr) {
+               case 0x1f801040:
+               case 0x1f801050:
+               case 0x1f801100:
+               case 0x1f801104:
+               case 0x1f801108:
+               case 0x1f801110:
+               case 0x1f801114:
+               case 0x1f801118:
+               case 0x1f801120:
+               case 0x1f801124:
+               case 0x1f801128:
+               case 0x1f801810:
+               case 0x1f801814:
+               case 0x1f801820:
+               case 0x1f801824:
+                       return false;
+               case 0x1f801070:
+               case 0x1f801074:
+               case 0x1f801088:
+               case 0x1f801098:
+               case 0x1f8010a8:
+               case 0x1f8010b8:
+               case 0x1f8010c8:
+               case 0x1f8010e8:
+               case 0x1f8010f4:
+                       return !is_write;
+               default:
+                       return !is_write || kaddr < 0x1f801c00 || kaddr >= 0x1f801e00;
+               }
+       }
+}
+
 static const struct lightrec_ops lightrec_ops = {
        .cop2_op = cop2_op,
        .enable_ram = lightrec_enable_ram,
+       .hw_direct = lightrec_can_hw_direct,
 };
 
 static int lightrec_plugin_init(void)
@@ -321,11 +396,10 @@ static int lightrec_plugin_init(void)
                lightrec_map[PSX_MAP_MIRROR1].address = psxM + 0x200000;
                lightrec_map[PSX_MAP_MIRROR2].address = psxM + 0x400000;
                lightrec_map[PSX_MAP_MIRROR3].address = psxM + 0x600000;
+               lightrec_map[PSX_MAP_HW_REGISTERS].address = psxH + 0x1000;
                lightrec_map[PSX_MAP_CODE_BUFFER].address = code_buffer;
        }
 
-       lightrec_debug = !!getenv("LIGHTREC_DEBUG");
-       lightrec_very_debug = !!getenv("LIGHTREC_VERY_DEBUG");
        use_lightrec_interpreter = !!getenv("LIGHTREC_INTERPRETER");
        if (getenv("LIGHTREC_BEGIN_CYCLES"))
          lightrec_begin_cycles = (unsigned int) strtol(
@@ -347,90 +421,6 @@ static int lightrec_plugin_init(void)
        return 0;
 }
 
-static u32 hash_calculate_le(const void *buffer, u32 count)
-{
-       unsigned int i;
-       u32 *data = (u32 *) buffer;
-       u32 hash = 0xffffffff;
-
-       count /= 4;
-       for(i = 0; i < count; ++i) {
-               hash += LE32TOH(data[i]);
-               hash += (hash << 10);
-               hash ^= (hash >> 6);
-       }
-
-       hash += (hash << 3);
-       hash ^= (hash >> 11);
-       hash += (hash << 15);
-       return hash;
-}
-
-static u32 hash_calculate(const void *buffer, u32 count)
-{
-       unsigned int i;
-       u32 *data = (u32 *) buffer;
-       u32 hash = 0xffffffff;
-
-       count /= 4;
-       for(i = 0; i < count; ++i) {
-               hash += data[i];
-               hash += (hash << 10);
-               hash ^= (hash >> 6);
-       }
-
-       hash += (hash << 3);
-       hash ^= (hash >> 11);
-       hash += (hash << 15);
-       return hash;
-}
-
-static const char * const mips_regs[] = {
-       "zero",
-       "at",
-       "v0", "v1",
-       "a0", "a1", "a2", "a3",
-       "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
-       "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
-       "t8", "t9",
-       "k0", "k1",
-       "gp", "sp", "fp", "ra",
-       "lo", "hi",
-};
-
-static void print_for_big_ass_debugger(void)
-{
-       unsigned int i;
-
-       printf("CYCLE 0x%08x PC 0x%08x", psxRegs.cycle, psxRegs.pc);
-
-       if (lightrec_very_debug)
-               printf(" RAM 0x%08x SCRATCH 0x%08x HW 0x%08x",
-                               hash_calculate_le(psxM, 0x200000),
-                               hash_calculate_le(psxH, 0x400),
-                               hash_calculate_le(psxH + 0x1000, 0x2000));
-
-       printf(" CP0 0x%08x CP2D 0x%08x CP2C 0x%08x INT 0x%04x INTCYCLE 0x%08x GPU 0x%08x",
-                       hash_calculate(&psxRegs.CP0.r,
-                               sizeof(psxRegs.CP0.r)),
-                       hash_calculate(&psxRegs.CP2D.r,
-                               sizeof(psxRegs.CP2D.r)),
-                       hash_calculate(&psxRegs.CP2C.r,
-                               sizeof(psxRegs.CP2C.r)),
-                       psxRegs.interrupt,
-                       hash_calculate(psxRegs.intCycle,
-                               sizeof(psxRegs.intCycle)),
-                       LE32TOH(HW_GPU_STATUS));
-
-       if (lightrec_very_debug)
-               for (i = 0; i < 34; i++)
-                       printf(" %s 0x%08x", mips_regs[i], psxRegs.GPR.r[i]);
-       else
-               printf(" GPR 0x%08x", hash_calculate(&psxRegs.GPR.r,
-                                       sizeof(psxRegs.GPR.r)));
-       printf("\n");
-}
-
 static void lightrec_dump_regs(struct lightrec_state *state)
 {
        struct lightrec_registers *regs = lightrec_get_registers(state);
@@ -462,22 +452,24 @@ static void lightrec_plugin_execute_block(void)
 
        gen_interupt();
 
+       // step during early boot so that 0x80030000 fastboot hack works
+       if (booting)
+               next_interupt = psxRegs.cycle;
+
        if (use_pcsx_interpreter) {
                intExecuteBlock();
        } else {
                lightrec_reset_cycle_count(lightrec_state, psxRegs.cycle);
                lightrec_restore_regs(lightrec_state);
 
-               if (unlikely(use_lightrec_interpreter))
+               if (unlikely(use_lightrec_interpreter)) {
                        psxRegs.pc = lightrec_run_interpreter(lightrec_state,
-                                                             psxRegs.pc);
-               // step during early boot so that 0x80030000 fastboot hack works
-               else if (unlikely(booting || lightrec_debug))
-                       psxRegs.pc = lightrec_execute_one(lightrec_state,
-                                                         psxRegs.pc);
-               else
+                                                             psxRegs.pc,
+                                                             next_interupt);
+               } else {
                        psxRegs.pc = lightrec_execute(lightrec_state,
                                                      psxRegs.pc, next_interupt);
+               }
 
                psxRegs.cycle = lightrec_current_cycle_count(lightrec_state);
 
@@ -497,10 +489,6 @@ static void lightrec_plugin_execute_block(void)
                        booting = false;
        }
 
-       if (lightrec_debug && psxRegs.cycle >= lightrec_begin_cycles
-                       && psxRegs.pc != old_pc)
-               print_for_big_ass_debugger();
-
        if ((psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x300) &&
                        (psxRegs.CP0.n.Status & 0x1)) {
                /* Handle software interrupts */