[subrepo]
remote = https://github.com/pcercuei/gnu_lightning.git
branch = pcsx_rearmed
- commit = 7fce9abb2a6bfc3967b4e5705794e617ed909402
- parent = 94d482f4b7f5da2c5af7e3590b770261f907f185
+ commit = b1dfc564e2327621d15e688911a398c3a729bd82
+ parent = 7393802c34796806043533cd379e5bcbd66cfd54
method = merge
cmdver = 0.4.3
+2022-09-08 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_fallback.c: Implement fallback compare and swap with
+ pthreads.
+ * check/Makefile.am: Update for new cas{r,i} simple test.
+ * check/catomic.c, check/catomic.ok: New test case for
+ simple compare and swap atomic operation.
+ * check/lightning.c: Add entries to be able to use
+ the new compare and swap atomic operation. Still missing
+ a general test, only the basic C version.
+ * include/lightning.h.in: Include pthread.h, even if not
+ needing a fallback compare and swap.
+ * include/lightning/jit_private.h: Add support for a register pair
+ in second argument. Required by the new casr and casi operations.
+ * lib/jit_aarch64-cpu.c, lib/jit_aarch64-sz.c, lib/jit_aarch64.c,
+ lib/jit_ppc-cpu.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_x86-cpu.c,
+ lib/jit_x86-sz.c, lib/jit_x86.c: Implement inline code for compare
+ and swap.
+ * lib/jit_arm-cpu.c, lib/jit_arm-sz.c, lib/jit_arm.c: Implement
+ inline code for compare and swap if cpu is armv7, otherwise, use
+ a fallback with pthreads.
+ * lib/jit_alpha-cpu.c, lib/jit_alpha-sz.c, lib/jit_alpha.c,
+ lib/jit_hppa-cpu.c, lib/jit_hppa-sz.c, lib/jit_hppa.c,
+ lib/jit_ia64-cpu.c, lib/jit_ia64-sz.c, lib/jit_ia64.c,
+ lib/jit_mips-cpu.c, lib/jit_mips-sz.c, lib/jit_mips.c,
+ lib/jit_riscv-cpu.c, lib/jit_riscv-sz.c, lib/jit_riscv.c,
+ lib/jit_s390-cpu.c, lib/jit_s390-sz.c, lib/jit_s390.c,
+ lib/jit_sparc-cpu.c, lib/jit_sparc-sz.c, lib/jit_sparc.c: Implement
+ fallback compare and swap with pthreads. At least some of these
+ should be updated for inline code generation.
+ * lib/jit_names.c, lib/jit_print.c: lib/lightning.c: Update for the
+ new compare and swap operation.
+ * doc/body.texi: Add simple documentation of the compare and swap
+ new operation.
+
+2022-08-12 Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
+
+ Document jit_align.
+ * doc/body.texi: Add documentation for jit_align.
+
2022-05-14 Paulo Andrade <pcpa@gnu.org>
* include/lightning.h.in: Reorder jit_mov{n,z}r in instruction list.
AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
-check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list
+check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \
+ catomic
lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
lightning_SOURCES = lightning.c
cva_list_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
cva_list_SOURCES = cva_list.c
+catomic_LDADD = $(top_builddir)/lib/liblightning.la -lm -lpthread $(SHLIB)
+catomic_SOURCES = catomic.c
+
$(top_builddir)/lib/liblightning.la:
cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
TESTS += $(nodata_TESTS)
endif
-TESTS += ccall self setcode nodata ctramp carg cva_list
+TESTS += ccall self setcode nodata ctramp carg cva_list catomic
CLEANFILES = $(TESTS)
#TESTS_ENVIRONMENT=$(srcdir)/run-test;
--- /dev/null
+#include <lightning.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+
+void alarm_handler(int unused)
+{
+ _exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ jit_state_t *_jit;
+ void (*code)(void);
+ jit_node_t *jmpi_main, *label;
+ jit_node_t *func0, *func1, *func2, *func3;
+ jit_node_t *patch0, *patch1, *patch2, *patch3;
+ jit_word_t lock;
+ pthread_t tids[4];
+
+ /* If there is any bug, do not hang in "make check" */
+ signal(SIGALRM, alarm_handler);
+ alarm(5);
+
+ init_jit(argv[0]);
+ _jit = jit_new_state();
+
+ jmpi_main = jit_jmpi();
+
+#define defun(name, line) \
+ jit_name(#name); \
+ jit_note("catomic.c", line); \
+ name = jit_label(); \
+ jit_prolog(); \
+ jit_movi(JIT_V0, (jit_word_t)&lock); \
+ jit_movi(JIT_R1, 0); \
+ jit_movi(JIT_R2, line); \
+ /* spin until get the lock */ \
+ label = jit_label(); \
+ jit_casr(JIT_R0, JIT_V0, JIT_R1, JIT_R2); \
+ jit_patch_at(jit_beqi(JIT_R0, 0), label); \
+ /* lock acquired */ \
+ jit_prepare(); \
+ /* pretend to be doing something useful for 0.01 usec
+ * while holding the lock */ \
+ jit_pushargi(10000); \
+ jit_finishi(usleep); \
+ /* release lock */ \
+ jit_movi(JIT_R1, 0); \
+ jit_str(JIT_V0, JIT_R1); \
+ /* Now test casi */ \
+ jit_movi(JIT_R1, 0); \
+ jit_movi(JIT_R2, line); \
+ /* spin until get the lock */ \
+ label = jit_label(); \
+ jit_casi(JIT_R0, (jit_word_t)&lock, JIT_R1, JIT_R2); \
+ jit_patch_at(jit_beqi(JIT_R0, 0), label); \
+ /* lock acquired */ \
+ jit_prepare(); \
+ /* pretend to be doing something useful for 0.01 usec
+ * while holding the lock */ \
+ jit_pushargi(10000); \
+ jit_finishi(usleep); \
+ jit_prepare(); \
+ /* for make check, just print "ok" */ \
+ jit_pushargi((jit_word_t)"ok"); \
+ /*jit_pushargi((jit_word_t)#name);*/ \
+ jit_finishi(puts); \
+ /* release lock */ \
+ jit_movi(JIT_R1, 0); \
+ jit_str(JIT_V0, JIT_R1); \
+ jit_ret(); \
+ jit_epilog();
+ defun(func0, __LINE__);
+ defun(func1, __LINE__);
+ defun(func2, __LINE__);
+ defun(func3, __LINE__);
+
+ jit_patch(jmpi_main);
+ jit_name("main");
+ jit_note("catomic.c", __LINE__);
+ jit_prolog();
+
+#define start(tid) \
+ /* set JIT_R0 to thread function */ \
+ jit_patch_at(jit_movi(JIT_R0, 0), func##tid); \
+ jit_prepare(); \
+ /* pthread_t first argument */ \
+ jit_pushargi((jit_word_t)(tids + tid)); \
+ /* pthread_attr_t second argument */ \
+ jit_pushargi((jit_word_t)NULL); \
+ /* start routine third argument */ \
+ jit_pushargr(JIT_R0); \
+ /* argument to start routine fourth argument */ \
+ jit_pushargi((jit_word_t)NULL); \
+ /* start thread */ \
+ jit_finishi(pthread_create);
+ /* spawn four threads */
+ start(0);
+ start(1);
+ start(2);
+ start(3);
+
+#define join(tid) \
+ /* load pthread_t value in JIT_R0 */ \
+ jit_movi(JIT_R0, (jit_word_t)tids); \
+ jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t)); \
+ jit_prepare(); \
+ jit_pushargr(JIT_R0); \
+ jit_pushargi((jit_word_t)NULL); \
+ jit_finishi(pthread_join);
+ /* wait for threads to finish */
+ join(0);
+ join(1);
+ join(2);
+ join(3);
+
+ jit_prepare();
+ jit_pushargi((jit_word_t)"ok");
+ jit_finishi(puts);
+
+ jit_ret();
+ jit_epilog();
+
+ code = jit_emit();
+
+#if 1
+ jit_disassemble();
+#endif
+
+ jit_clear_state();
+
+ /* let first thread acquire the lock */
+ lock = 0;
+
+ (*code)();
+ jit_destroy_state();
+
+ finish_jit();
+
+ return (0);
+}
--- /dev/null
+ok
+ok
+ok
+ok
+ok
static void gtr(void); static void gti(void);
static void gtr_u(void); static void gti_u(void);
static void ner(void); static void nei(void);
+static void casr(void); static void casi(void);
static void movr(void); static void movi(void);
static void extr_c(void); static void extr_uc(void);
static void extr_s(void); static void extr_us(void);
entry(gtr), entry(gti),
entry(gtr_u), entry(gti_u),
entry(ner), entry(nei),
+ entry(casr), entry(casi),
entry(movr), entry(movi),
entry(extr_c), entry(extr_uc),
entry(extr_s), entry(extr_us),
jit_word_t im = get_imm(); \
jit_##name(r0, r1, r2, im); \
}
+#define entry_ir_im_ir_ir(name) \
+static void \
+name(void) \
+{ \
+ jit_gpr_t r0 = get_ireg(); \
+ jit_word_t im = get_imm(); \
+ jit_gpr_t r1 = get_ireg(), r2 = get_ireg(); \
+ jit_##name(r0, im, r1, r2); \
+}
+
#define entry_ir_ir(name) \
static void \
name(void) \
entry_ir_ir_ir(gtr) entry_ir_ir_im(gti)
entry_ir_ir_ir(gtr_u) entry_ir_ir_im(gti_u)
entry_ir_ir_ir(ner) entry_ir_ir_im(nei)
+entry_ir_ir_ir_ir(casr) entry_ir_im_ir_ir(casi)
entry_ir_ir(movr)
static void
movi(void)
return 0;
}
)], [ac_cv_test_new_disassembler=no],,)
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ #include <dis-asm.h>
+ int main(int argc, char *argv[])
+ {
+ struct disassemble_info dinfo;
+ INIT_DISASSEMBLE_INFO(dinfo, NULL, NULL, NULL);
+ return 0;
+ }
+ )], [ac_cv_test_new_disassemble_info=yes],[ac_cv_test_new_disassemble_info=no],)
CFLAGS="$save_CFLAGS"
if test "x$ac_cv_test_new_disassembler" != "xno"; then
LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DBINUTILS_2_29=1"
fi
+ if test "x$ac_cv_test_new_disassemble_info" != "xno"; then
+ LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DBINUTILS_2_38=1"
+ fi
fi
AC_ARG_ENABLE(devel-disassembler,
indirect (not specified) @r{special simple label}
@end example
+The following instruction is used to specify a minimal alignment for
+the next instruction, usually with a label:
+@example
+align (not specified) @r{align code}
+@end example
+
@code{label} is normally used as @code{patch_at} argument for backward
jumps.
the @code{movi}, but on some special conditions it is required to create
an "unbound" label.
+@code{align} is useful for creating multiple entry points to a
+(trampoline) function that are all accessible through a single
+function pointer. @code{align} receives an integer argument that
+defines the minimal alignment of the address of a label directly
+following the @code{align} instruction. The integer argument must be
+a power of two and the effective alignment will be a power of two no
+less than the argument to @code{align}. If the argument to
+@code{align} is 16 or more, the effective alignment will match the
+specified minimal alignment exactly.
+
+@example
+ jit_node_t *forward, *label1, *label2, *jump;
+ unsigned char *addr1, *addr2;
+forward = jit_forward();
+ jit_align(16);
+label1 = jit_indirect(); @rem{/* first entry point */}
+jump = jit_jmpi(); @rem{/* jump to first handler */}
+ jit_patch_at(jump, forward);
+ jit_align(16);
+label2 = jit_indirect(); @rem{/* second entry point */}
+ ... @rem{/* second handler */}
+ jit_jmpr(...);
+ jit_link(forward);
+ ... @rem{/* first handler /*}
+ jit_jmpr(...);
+ ...
+ jit_emit();
+ addr1 = jit_address(label1);
+ addr2 = jit_address(label2);
+ assert(addr2 - addr1 == 16); @rem{/* only one of the addresses needs to be remembered */}
+@end example
+
@item Function prolog
These macros are used to set up a function prolog. The @code{allocai}
@code{pointer_p} expects a pointer argument, and will return non
zero if the pointer is inside the generated jit code. Must be
called after @code{jit_emit} and before @code{jit_destroy_state}.
+
+@item Atomic operations
+Only compare-and-swap is implemented. It accepts four operands;
+the second can be an immediate.
+
+The first argument is set with a boolean value telling if the operation
+did succeed.
+
+Arguments must be different, cannot use the result register to also pass
+an argument.
+
+The second argument is the address of a machine word.
+
+The third argument is the old value.
+
+The fourth argument is the new value.
+
+@example
+casr 01 = (*O2 == O3) ? (*O2 = O4, 1) : 0
+casi 01 = (*O2 == O3) ? (*O2 = O4, 1) : 0
+@end example
+
+If value at the address in the second argument is equal to the third
+argument, the address value is atomically modified to the value of the
+fourth argument and the first argument is set to a non zero value.
+
+If the value at the address in the second argument is not equal to the
+third argument nothing is done and the first argument is set to zero.
@end table
@node GNU lightning examples
#include <stdlib.h>
@MAYBE_INCLUDE_STDINT_H@
#include <string.h>
+#include <pthread.h>
#if defined(__hpux) && defined(__hppa__)
# include <machine/param.h>
#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
#endif
+ jit_code_casr, jit_code_casi,
+#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x)
+#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x)
+
jit_code_last_code
} jit_code_t;
extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t,
jit_int32_t, jit_int32_t,
jit_word_t, jit_word_t);
+#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h)
+extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t,
+ jit_word_t, jit_word_t,
+ jit_int32_t, jit_int32_t);
#define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w)
extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t,
jit_word_t, jit_word_t, jit_float32_t);
#define jit_cc_a2_int 0x00100000 /* arg2 is immediate word */
#define jit_cc_a2_flt 0x00200000 /* arg2 is immediate float */
#define jit_cc_a2_dbl 0x00400000 /* arg2 is immediate double */
+#define jit_cc_a2_rlh 0x00800000 /* arg2 is a register pair */
#if __ia64__ || (__sparc__ && __WORDSIZE == 64)
extern void
# define A64_LDRSB 0x38e06800
# define A64_STR 0xf8206800
# define A64_LDR 0xf8606800
+# define A64_LDAXR 0xc85ffc00
+# define A64_STLXR 0xc800fc00
# define A64_STRH 0x78206800
# define A64_LDRH 0x78606800
# define A64_LDRSH 0x78a06800
# define LDR(Rt,Rn,Rm) oxxx(A64_LDR,Rt,Rn,Rm)
# define LDRI(Rt,Rn,Imm12) oxxi(A64_LDRI,Rt,Rn,Imm12)
# define LDUR(Rt,Rn,Imm9) oxx9(A64_LDUR,Rt,Rn,Imm9)
+# define LDAXR(Rt,Rn) o_xx(A64_LDAXR,Rt,Rn)
+# define STLXR(Rs,Rt,Rn) oxxx(A64_STLXR,Rs,Rn,Rt)
# define STRB(Rt,Rn,Rm) oxxx(A64_STRB,Rt,Rn,Rm)
# define STRBI(Rt,Rn,Imm12) oxxi(A64_STRBI,Rt,Rn,Imm12)
# define STURB(Rt,Rn,Imm9) oxx9(A64_STURB,Rt,Rn,Imm9)
# define extr_us(r0,r1) UXTH(r0,r1)
# define extr_i(r0,r1) SXTW(r0,r1)
# define extr_ui(r0,r1) UXTW(r0,r1)
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define movr(r0,r1) _movr(_jit,r0,r1)
static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
# define movi(r0,i0) _movi(_jit,r0,i0)
}
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ jit_int32_t r1_reg, iscasi;
+ jit_word_t retry, done, jump0, jump1;
+ if ((iscasi = (r1 == _NOREG))) {
+ r1_reg = jit_get_reg(jit_class_gpr);
+ r1 = rn(r1_reg);
+ movi(r1, i0);
+ }
+ /* retry: */
+ retry = _jit->pc.w;
+ LDAXR(r0, r1);
+ jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */
+ STLXR(r0, r3, r1);
+ jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */
+ /* done: */
+ CSET(r0, CC_EQ);
+ done = _jit->pc.w;
+ patch_at(jump0, done);
+ patch_at(jump1, retry);
+ if (iscasi)
+ jit_unget_reg(r1_reg);
+}
+
static void
_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
8, /* bswapr_us */
8, /* bswapr_ui */
4, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
case_rr(ext, _us);
case_rr(ext, _i);
case_rr(ext, _ui);
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rr(mov,);
case_rrr(movn,);
case_rrr(movz,);
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
# define negr(r0,r1) NEGQ(r1,r0)
# define comr(r0,r1) NOT(r1,r0)
# define addr(r0,r1,r2) ADDQ(r1,r2,r0)
patch_at(w, _jit->pc.w);
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ fallback_casx(r0, r1, r2, r3, i0);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
16, /* bswapr_us */
36, /* bswapr_ui */
36, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
#define PROTO 1
# include "jit_alpha-cpu.c"
# include "jit_alpha-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
case_rr(ext, _us);
case_rr(ext, _i);
case_rr(ext, _ui);
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rrr(movn,);
case_rrr(movz,);
case_rr(mov,);
#define CODE 1
# include "jit_alpha-cpu.c"
# include "jit_alpha-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
# define jit_armv5_p() (jit_cpu.version >= 5)
# define jit_armv5e_p() (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
# define jit_armv6_p() (jit_cpu.version >= 6)
+# define jit_armv7_p() (jit_cpu.version >= 7)
# define jit_armv7r_p() 0
# define stack_framesize 48
extern int __aeabi_idivmod(int, int);
# define ARM_XTR8 0x00000400 /* ?xt? rotate 8 bits */
# define ARM_XTR16 0x00000800 /* ?xt? rotate 16 bits */
# define ARM_XTR24 0x00000c00 /* ?xt? rotate 24 bits */
+# define ARM_LDREX 0x01900090
+# define THUMB2_LDREX 0xe8500000
+# define ARM_STREX 0x01800090
+# define THUMB2_STREX 0xe8400000
/* << ARMv6* */
+/* >> ARMv7 */
+# define ARM_DMB 0xf57ff050
+# define THUMB2_DMB 0xf3bf8f50
+# define DMB_SY 0xf
+# define DMB_ST 0xe
+# define DMB_ISH 0xb
+# define DMB_ISHST 0xa
+# define DMB_NSH 0x7
+# define DMB_NSHT 0x6
+# define DMB_OSH 0x3
+# define DMB_OSHST 0x2
+/* << ARMv7 */
# define ARM_SHIFT 0x01a00000
# define ARM_R 0x00000010 /* register shift */
# define ARM_LSL 0x00000000
static void _tpp(jit_state_t*,int,int);
# define torl(o,rn,im) _torl(_jit,o,rn,im)
static void _torl(jit_state_t*,int,int,int) maybe_unused;
+# define DMB(im) dmb(im)
+# define T2_DMB(im) tdmb(im)
+# define dmb(im) _dmb(_jit, im)
+static void _dmb(jit_state_t *_jit, int im);
+# define tdmb(im) _tdmb(_jit, im)
+static void _tdmb(jit_state_t *_jit, int im);
# define CC_MOV(cc,rd,rm) corrr(cc,ARM_MOV,0,rd,rm)
# define MOV(rd,rm) CC_MOV(ARM_CC_AL,rd,rm)
# define T1_MOV(rd,rm) is(THUMB_MOV|((_u4(rd)&8)<<4)|(_u4(rm)<<3)|(rd&7))
# define CC_LDRDIN(cc,rt,rn,im) corri8(cc,ARM_LDRDI,rn,rt,im)
# define LDRDIN(rt,rn,im) CC_LDRDIN(ARM_CC_AL,rt,rn,im)
# define T2_LDRDIN(rt,rt2,rn,im) torrri8(THUMB2_LDRDI,rn,rt,rt2,im)
+# define CC_LDREX(cc,rt,rn) corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf)
+# define LDREX(rt,rn) CC_LDREX(ARM_CC_AL,rt,rn)
+# define T2_LDREX(rt,rn,im) torrri8(THUMB2_LDREX,rn,rt,0xf,im)
# define CC_STRB(cc,rt,rn,rm) corrr(cc,ARM_STRB|ARM_P,rn,rt,rm)
# define STRB(rt,rn,rm) CC_STRB(ARM_CC_AL,rt,rn,rm)
# define T1_STRB(rt,rn,rm) is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
# define CC_STRDIN(cc,rt,rn,im) corri8(cc,ARM_STRDI,rn,rt,im)
# define STRDIN(rt,rn,im) CC_STRDIN(ARM_CC_AL,rt,rn,im)
# define T2_STRDIN(rt,rt2,rn,im) torrri8(THUMB2_STRDI,rn,rt,rt2,im)
+# define CC_STREX(cc,rd,rt,rn) corrrr(cc,ARM_STREX,rn,rd,0xf,rt)
+# define STREX(rd,rt,rn) CC_STREX(ARM_CC_AL,rd,rt,rn)
+# define T2_STREX(rd,rt,rn,im) torrri8(THUMB2_STREX,rn,rt,rd,im)
# define CC_LDMIA(cc,rn,im) corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im)
# define LDMIA(rn,im) CC_LDMIA(ARM_CC_AL,rn,im)
# define CC_LDM(cc,rn,im) CC_LDMIA(cc,rn,im)
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define comr(r0,r1) _comr(_jit,r0,r1)
static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
# define negr(r0,r1) _negr(_jit,r0,r1)
iss(thumb.s[0], thumb.s[1]);
}
+static void
+_dmb(jit_state_t *_jit, int im)
+{
+ assert(!(im & 0xfffffff0));
+ ii(ARM_DMB|im);
+}
+
+static void
+_tdmb(jit_state_t *_jit, int im)
+{
+ jit_thumb_t thumb;
+ assert(!(im & 0xfffffff0));
+ thumb.i = THUMB2_DMB | im;
+ iss(thumb.s[0], thumb.s[1]);
+}
+
static void
_nop(jit_state_t *_jit, jit_int32_t i0)
{
_movznr(_jit, ARM_CC_EQ, r0, r1, r2);
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ jit_int32_t r1_reg, iscasi;
+ jit_word_t retry, done, jump0, jump1;
+ if (!jit_armv7_p())
+ fallback_casx(r0, r1, r2, r3, i0);
+ else {
+ if ((iscasi = (r1 == _NOREG))) {
+ r1_reg = jit_get_reg(jit_class_gpr);
+ r1 = rn(r1_reg);
+ movi(r1, i0);
+ }
+ if (jit_thumb_p()) {
+ T2_DMB(DMB_ISH);
+ /* retry: */
+ retry = _jit->pc.w;
+ T2_LDREX(r0, r1, 0);
+ jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */
+ T2_STREX(r0, r3, r1, 0);
+ jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */
+ /* done: */
+ done = _jit->pc.w;
+ /* r0 = 0 if memory updated, 1 otherwise */
+ xori(r0, r0, 1);
+ T2_DMB(DMB_ISH);
+ }
+ else {
+ DMB(DMB_ISH);
+ /* retry: */
+ retry = _jit->pc.w;
+ LDREX(r0, r1);
+ jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */
+ STREX(r0, r3, r1);
+ jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */
+ /* done: */
+ done = _jit->pc.w;
+ /* r0 = 0 if memory updated, 1 otherwise */
+ xori(r0, r0, 1);
+ DMB(DMB_ISH);
+ }
+ patch_at(arm_patch_jump, jump0, done);
+ patch_at(arm_patch_jump, jump1, retry);
+ if (iscasi)
+ jit_unget_reg(r1_reg);
+ }
+}
+
static void
_comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
8, /* bswapr_us */
4, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
20, /* bswapr_us */
16, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
# include "jit_arm-cpu.c"
# include "jit_arm-swf.c"
# include "jit_arm-vfp.c"
+# include "jit_fallback.c"
#undef PROTO
/*
case_rr(ext, _uc);
case_rr(ext, _s);
case_rr(ext, _us);
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rr(mov,);
case_rrr(movn,);
case_rrr(movz,);
# include "jit_arm-cpu.c"
# include "jit_arm-swf.c"
# include "jit_arm-vfp.c"
+# include "jit_fallback.c"
#undef CODE
void
static FILE *disasm_stream;
#endif
+#if BINUTILS_2_38
+static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...)
+{
+ va_list args;
+ int r;
+
+ va_start(args, fmt);
+ r = vprintf(fmt, args);
+ va_end(args);
+
+ return r;
+}
+#endif
+
/*
* Implementation
*/
if (!disasm_stream)
disasm_stream = stdout;
+#if BINUTILS_2_38
+ INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf, fprintf_styled);
+#else
INIT_DISASSEMBLE_INFO(disasm_info, disasm_stream, fprintf);
+#endif
disasm_info.arch = bfd_get_arch(disasm_bfd);
disasm_info.mach = bfd_get_mach(disasm_bfd);
--- /dev/null
+#if PROTO
+#define fallback_save(r0) _fallback_save(_jit, r0)
+static void _fallback_save(jit_state_t*, jit_int32_t);
+#define fallback_load(r0) _fallback_load(_jit, r0)
+static void _fallback_load(jit_state_t*, jit_int32_t);
+#define fallback_save_regs(r0) _fallback_save_regs(_jit, r0)
+static void _fallback_save_regs(jit_state_t*, jit_int32_t);
+#define fallback_load_regs(r0) _fallback_load_regs(_jit, r0)
+static void _fallback_load_regs(jit_state_t*, jit_int32_t);
+#define fallback_calli(i0, i1) _fallback_calli(_jit, i0, i1)
+static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t);
+#define fallback_casx(r0,r1,r2,r3,im) _fallback_casx(_jit,r0,r1,r2,r3,im)
+static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t,
+ jit_int32_t, jit_int32_t, jit_word_t);
+#endif
+
+#if CODE
+static void
+_fallback_save(jit_state_t *_jit, jit_int32_t r0)
+{
+ jit_int32_t offset, regno, spec;
+ for (offset = 0; offset < JIT_R_NUM; offset++) {
+ spec = _rvs[offset].spec;
+ regno = jit_regno(spec);
+ if (regno == r0) {
+ if (!(spec & jit_class_sav))
+ stxi(_jitc->function->regoff[offset], rn(JIT_FP), regno);
+ break;
+ }
+ }
+}
+
+static void
+_fallback_load(jit_state_t *_jit, jit_int32_t r0)
+{
+ jit_int32_t offset, regno, spec;
+ for (offset = 0; offset < JIT_R_NUM; offset++) {
+ spec = _rvs[offset].spec;
+ regno = jit_regno(spec);
+ if (regno == r0) {
+ if (!(spec & jit_class_sav))
+ ldxi(regno, rn(JIT_FP), _jitc->function->regoff[offset]);
+ break;
+ }
+ }
+}
+
+static void
+_fallback_save_regs(jit_state_t *_jit, jit_int32_t r0)
+{
+ jit_int32_t offset, regno, spec;
+ for (offset = 0; offset < JIT_R_NUM; offset++) {
+ regno = JIT_R(offset);
+ spec = _rvs[regno].spec;
+ if ((spec & jit_class_gpr) && regno == r0)
+ continue;
+ if (!(spec & jit_class_sav)) {
+ if (!_jitc->function->regoff[regno]) {
+ _jitc->function->regoff[regno] =
+ jit_allocai(sizeof(jit_word_t));
+ _jitc->again = 1;
+ }
+ jit_regset_setbit(&_jitc->regsav, regno);
+ emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno);
+ }
+ }
+ /* If knew for certain float registers are not used by
+ * pthread_mutex_lock and pthread_mutex_unlock, could skip this */
+ for (offset = 0; offset < JIT_F_NUM; offset++) {
+ regno = JIT_F(offset);
+ spec = _rvs[regno].spec;
+ if (!(spec & jit_class_sav)) {
+ if (!_jitc->function->regoff[regno]) {
+ _jitc->function->regoff[regno] =
+ jit_allocai(sizeof(jit_word_t));
+ _jitc->again = 1;
+ }
+ jit_regset_setbit(&_jitc->regsav, regno);
+ emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno);
+ }
+ }
+}
+
+static void
+_fallback_load_regs(jit_state_t *_jit, jit_int32_t r0)
+{
+ jit_int32_t offset, regno, spec;
+ for (offset = 0; offset < JIT_R_NUM; offset++) {
+ regno = JIT_R(offset);
+ spec = _rvs[regno].spec;
+ if ((spec & jit_class_gpr) && regno == r0)
+ continue;
+ if (!(spec & jit_class_sav)) {
+ jit_regset_clrbit(&_jitc->regsav, regno);
+ emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]);
+ }
+ }
+ /* If knew for certain float registers are not used by
+ * pthread_mutex_lock and pthread_mutex_unlock, could skip this */
+ for (offset = 0; offset < JIT_F_NUM; offset++) {
+ regno = JIT_F(offset);
+ spec = _rvs[regno].spec;
+ if (!(spec & jit_class_sav)) {
+ jit_regset_clrbit(&_jitc->regsav, regno);
+ emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]);
+ }
+ }
+}
+
+static void
+_fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1)
+{
+# if defined(__mips__)
+ movi(rn(_A0), i1);
+# elif defined(__arm__)
+ movi(rn(_R0), i1);
+# elif defined(__sparc__)
+ movi(rn(_O0), i1);
+# elif defined(__ia64__)
+ /* avoid confusion with pushargi patching */
+ if (i1 >= -2097152 && i1 <= 2097151)
+ MOVI(_jitc->rout, i1);
+ else
+ MOVL(_jitc->rout, i1);
+# elif defined(__hppa__)
+ movi(_R26_REGNO, i1);
+# elif defined(__s390__) || defined(__s390x__)
+ movi(rn(_R2), i1);
+# elif defined(__alpha__)
+ movi(rn(_A0), i1);
+# elif defined(__riscv__)
+ movi(rn(JIT_RA0), i1);
+# endif
+ calli(i0);
+}
+
+static void
+_fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ jit_int32_t r1_reg, iscasi;
+ jit_word_t jump, done;
+ /* XXX only attempts to fallback cas for lightning jit code */
+ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+ if ((iscasi = r1 == _NOREG)) {
+ r1_reg = jit_get_reg(jit_class_gpr);
+ r1 = rn(r1_reg);
+ movi(r1, i0);
+ }
+ fallback_save_regs(r0);
+ fallback_calli((jit_word_t)pthread_mutex_lock, (jit_word_t)&mutex);
+ fallback_load(r1);
+ ldr(r0, r1);
+ fallback_load(r2);
+ eqr(r0, r0, r2);
+ fallback_save(r0);
+ jump = bnei(_jit->pc.w, r0, 1);
+ fallback_load(r3);
+# if __WORDSIZE == 32
+ str_i(r1, r3);
+# else
+ str_l(r1, r3);
+# endif
+ /* done: */
+ done = _jit->pc.w;
+ fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex);
+ fallback_load(r0);
+# if defined(__arm__)
+ patch_at(arm_patch_jump, jump, done);
+# else
+ patch_at(jump, done);
+# endif
+ fallback_load_regs(r0);
+ if (iscasi)
+ jit_unget_reg(r1_reg);
+}
+#endif
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
#define comr(r0,r1) UADDCM(_R0_REGNO,r1,r0)
#define negr(r0,r1) SUB(_R0_REGNO,r1,r0)
#define extr_c(r0,r1) EXTRWR(r1,31,8,r0)
patch_at(w, _jit->pc.w);
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ fallback_casx(r0, r1, r2, r3, i0);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
36, /* bswapr_us */
80, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
#define PROTO 1
# include "jit_hppa-cpu.c"
# include "jit_hppa-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
case_rrw(rsh, _u);
case_rrr(movn,);
case_rrr(movz,);
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rr(mov,);
case jit_code_movi:
if (node->flag & jit_flag_node) {
#define CODE 1
# include "jit_hppa-cpu.c"
# include "jit_hppa-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
patch_at(w, _jit->pc.w);
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ fallback_casx(r0, r1, r2, r3, i0);
+}
+
static void
_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
48, /* bswapr_us */
48, /* bswapr_ui */
16, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
#define PROTO 1
# include "jit_ia64-cpu.c"
# include "jit_ia64-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
case_rrw(rsh, _u);
case_rr(neg,);
case_rr(com,);
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rrr(movn,);
case_rrr(movz,);
case_rr(mov,);
#define CODE 1
# include "jit_ia64-cpu.c"
# include "jit_ia64-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
# define movnr(r0,r1,r2) MOVN(r0, r1, r2)
# define movzr(r0,r1,r2) MOVZ(r0, r1, r2)
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define ldr_c(r0,r1) LB(r0,0,r1)
# define ldi_c(r0,i0) _ldi_c(_jit,r0,i0)
static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
return (w);
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ fallback_casx(r0, r1, r2, r3, i0);
+}
+
static void
_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
static void
_calli(jit_state_t *_jit, jit_word_t i0)
{
+ if (((_jit->pc.w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
+ if (can_sign_extend_short_p(i0)) {
+ JAL((i0 & ~0xf0000000) >> 2);
+ addiu(_T9_REGNO, _ZERO_REGNO, i0);
+ return;
+ }
+
+ if (can_zero_extend_short_p(i0)) {
+ JAL((i0 & ~0xf0000000) >> 2);
+ ORI(_T9_REGNO, _ZERO_REGNO, i0);
+ return;
+ }
+
+ if (can_sign_extend_int_p(i0)) {
+ if (i0 & 0xffff) {
+ LUI(_T9_REGNO, i0 >> 16);
+ JAL((i0 & ~0xf0000000) >> 2);
+ ORI(_T9_REGNO, _T9_REGNO, i0);
+ } else {
+ JAL((i0 & ~0xf0000000) >> 2);
+ LUI(_T9_REGNO, i0 >> 16);
+ }
+ return;
+ }
+ }
+
movi(_T9_REGNO, i0);
JALR(_T9_REGNO);
NOP(1);
20, /* bswapr_us */
52, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* NEW_ABI */
#endif /* __WORDSIZE */
20, /* bswapr_us */
52, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* NEW_ABI */
#endif /* __WORDSIZE */
20, /* bswapr_us */
52, /* bswapr_ui */
116, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
# include "jit_rewind.c"
# include "jit_mips-cpu.c"
# include "jit_mips-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
jit_inc_synth_w(finishr, r0);
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
- jit_movr(_T9, r0);
- call = jit_callr(_T9);
+ call = jit_callr(r0);
call->v.w = _jitc->function->self.argi;
#if NEW_ABI
call->w.w = call->v.w;
case_rr(ext, _i);
case_rr(ext, _ui);
#endif
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rrr(movn,);
case_rrr(movz,);
case_rr(mov,);
# include "jit_rewind.c"
# include "jit_mips-cpu.c"
# include "jit_mips-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
"movr_d_w", "movi_d_w",
"bswapr_us",
"bswapr_ui", "bswapr_ul",
+ "casr", "casi",
};
# define LHAU(d,a,s) FDs(43,d,a,s)
# define LHAUX(d,a,b) FX(31,d,a,b,375)
# define LHAX(d,a,b) FX(31,d,a,b,343)
-# define LHRBX(d,a,b) FX(31,d,a,b,790)
+# define LHBRX(d,a,b) FX(31,d,a,b,790)
# define LHZ(d,a,s) FDs(40,d,a,s)
# define LHZU(d,a,s) FDs(41,d,a,s)
# define LHZUX(d,a,b) FX(31,d,a,b,311)
# define LSWI(d,a,n) FX(31,d,a,n,597)
# define LSWX(d,a,b) FX(31,d,a,b,533)
# define LWARX(d,a,b) FX(31,d,a,b,20)
+# define LDARX(d,a,b) FX(31,d,a,b,84)
# define LWBRX(d,a,b) FX(31,d,a,b,534)
# define LWA(d,a,s) FDs(58,d,a,s|2)
# define LWAUX(d,a,b) FX(31,d,a,b,373)
# define STW(s,a,d) FDs(36,s,a,d)
# define STWBRX(s,a,b) FX(31,s,a,b,662)
# define STWCX_(s,a,b) FX_(31,s,a,b,150)
+# define STDCX_(s,a,b) FX_(31,s,a,b,214)
# define STWU(s,a,d) FDs(37,s,a,d)
# define STWUX(s,a,b) FX(31,s,a,b,183)
# define STWX(s,a,b) FX(31,s,a,b,151)
static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define negr(r0,r1) NEG(r0,r1)
# define comr(r0,r1) NOT(r0,r1)
# define extr_c(r0,r1) EXTSB(r0,r1)
# define extr_i(r0,r1) EXTSW(r0,r1)
# define extr_ui(r0,r1) CLRLDI(r0,r1,32)
# endif
-# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1)
-static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
-# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1)
-static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_us_lh(r0,r1,no_flag) _bswapr_us(_jit,r0,r1,no_flag)
+# define bswapr_us(r0,r1) _bswapr_us(_jit,r0,r1,0)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
+# define bswapr_ui_lw(r0,r1,no_flag) _bswapr_ui(_jit,r0,r1,no_flag)
+# define bswapr_ui(r0,r1) _bswapr_ui(_jit,r0,r1,0)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_bool_t);
# if __WORDSIZE == 64
# define bswapr_ul(r0,r1) generic_bswapr_ul(_jit,r0,r1)
# endif
}
static void
-_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
{
+ jit_int32_t r1_reg, iscasi;
+ jit_word_t retry, done, jump0, jump1;
+ if ((iscasi = (r1 == _NOREG))) {
+ r1_reg = jit_get_reg(jit_class_gpr);
+ r1 = rn(r1_reg);
+ movi(r1, i0);
+ }
+ SYNC();
+ /* retry: */
+ retry = _jit->pc.w;
+# if __WORDSIZE == 32
+ LWARX(r0, _R0_REGNO, r1);
+# else
+ LDARX(r0, _R0_REGNO, r1);
+# endif
+ jump0 = bner(_jit->pc.w, r0, r2); /* bne done r0 r2 */
+# if __WORDSIZE == 32
+ STWCX_(r3, _R0_REGNO, r1);
+# else
+ STDCX_(r3, _R0_REGNO, r1);
+# endif
+ jump1 = bnei(_jit->pc.w, r0, 0); /* bne retry r0 0 */
+ /* done: */
+ done = _jit->pc.w;
+ ISYNC();
+ MFCR(r0);
+ patch_at(jump0, done);
+ patch_at(jump1, retry);
+ if (iscasi)
+ jit_unget_reg(r1_reg);
+}
+
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
+{
+ jit_int32_t reg, addr_reg;
+
+ /* Convert load followed by bswap to a single instruction */
+ /* FIXME r0 and r1 do not need to be the same, only must check if
+ * r1 was loaded in previous instruction */
+ if (no_flag && r0 == r1) {
+ if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00022e | r0 << 21)) {
+ /* Convert LHZX to LHBRX */
+ _jit->pc.ui--;
+ LHBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f);
+ return;
+ }
+
+ if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0xa0000000 | r0 << 21)) {
+ /* Convert LHZ to LHBRX */
+ _jit->pc.ui--;
+ addr_reg = (*_jit->pc.ui >> 16) & 0x1f;
+
+ reg = jit_get_reg(jit_class_gpr);
+ LI(rn(reg), (short)*_jit->pc.ui);
+ LHBRX(r0, rn(reg), addr_reg);
+ jit_unget_reg(reg);
+ return;
+ }
+ }
+
if (r0 == r1) {
RLWIMI(r0, r0, 16, 8, 15);
RLWINM(r0, r0, 24, 16, 31);
}
static void
-_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
{
- jit_int32_t reg;
+ jit_int32_t reg, addr_reg;
+
+ /* Convert load followed by bswap to a single instruction */
+ /* FIXME r0 and r1 do not need to be the same, only must check if
+ * r1 was loaded in previous instruction */
+ if (no_flag && r0 == r1) {
+ if ((*(_jit->pc.ui - 1) & 0xffe007ff) == (0x7c00002e | r0 << 21)) {
+ /* Convert LWZX to LWBRX */
+ _jit->pc.ui--;
+ LWBRX(r0, (*_jit->pc.ui >> 16) & 0x1f, (*_jit->pc.ui >> 11) & 0x1f);
+ return;
+ }
+
+ if ((*(_jit->pc.ui - 1) & 0xffe00000) == (0x80000000 | r0 << 21)) {
+ /* Convert LWZ to LWBRX */
+ _jit->pc.ui--;
+ addr_reg = (*_jit->pc.ui >> 16) & 0x1f;
+
+ reg = jit_get_reg(jit_class_gpr);
+ LI(rn(reg), (short)*_jit->pc.ui);
+ LWBRX(r0, rn(reg), addr_reg);
+ jit_unget_reg(reg);
+ return;
+ }
+ }
+
reg = jit_get_reg(jit_class_gpr);
ROTLWI(rn(reg), r1, 8);
RLWIMI(rn(reg), r1, 24, 0, 7);
jit_unget_reg(reg);
}
+# define is_mask(im) ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0)
+
static void
_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
+ jit_int32_t reg, offt;
if (can_zero_extend_short_p(i0))
ANDI_(r0, r1, i0);
else if (can_zero_extend_int_p(i0) && !(i0 & 0x0000ffff))
ANDIS_(r0, r1, (jit_uword_t)i0 >> 16);
- else {
+ else if (__WORDSIZE == 32 && is_mask(i0)) {
+ offt = __builtin_ctzl(i0);
+ RLWINM(r0, r1, 0, 32 - offt - __builtin_popcountl(i0), 31 - offt);
+ } else if (__WORDSIZE == 32 && is_mask(~i0)) {
+ offt = __builtin_ctzl(~i0);
+ RLWINM(r0, r1, 0, 32 - offt, 31 - offt - __builtin_popcountl(~i0));
+ } else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
AND(r0, r1, rn(reg));
{
# if _CALL_SYSV
jit_word_t d;
- d = (i0 - _jit->pc.w) & ~3;
- if (can_sign_extend_jump_p(d))
- BL(d);
- else
+ d = (i0 - _jit->pc.w - !!varargs * 4) & ~3;
+ if (can_sign_extend_jump_p(d)) {
+ /* Tell double arguments were passed in registers. */
+ if (varargs)
+ CREQV(6, 6, 6);
+ BL(d);
+ } else
# endif
{
movi(_R12_REGNO, i0);
# define absr_d(r0,r1) FABS(r0,r1)
# define negr_f(r0,r1) negr_d(r0,r1)
# define negr_d(r0,r1) FNEG(r0,r1)
-# define sqrtr_f(r0,r1) FSQRTS(r0,r1)
-# define sqrtr_d(r0,r1) FSQRT(r0,r1)
+# ifdef _ARCH_PPCSQ
+# define sqrtr_f(r0,r1) FSQRTS(r0,r1)
+# define sqrtr_d(r0,r1) FSQRT(r0,r1)
+# else
+extern float sqrtf(float);
+# define sqrtr_f(r0,r1) _sqrtr_f(_jit,r0,r1)
+static void _sqrtr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+extern double sqrt(double);
+# define sqrtr_d(r0,r1) _sqrtr_d(_jit,r0,r1)
+static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+# endif
# define addr_f(r0,r1,r2) FADDS(r0,r1,r2)
# define addr_d(r0,r1,r2) FADD(r0,r1,r2)
# define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0)
ldi_d(r0, (jit_word_t)i0);
}
-/* should only work on newer ppc (fcfid is a ppc64 instruction) */
static void
_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
# if __WORDSIZE == 32
- jit_int32_t reg;
+ jit_int32_t reg, freg, off1, off2;
+
+# if __BYTE_ORDER == __BIG_ENDIAN
+ off1 = alloca_offset - 8;
+ off2 = alloca_offset - 4;
+# else
+ off1 = alloca_offset - 4;
+ off2 = alloca_offset - 8;
+# endif
+
reg = jit_get_reg(jit_class_gpr);
- rshi(rn(reg), r1, 31);
- /* use reserved 8 bytes area */
- stxi(alloca_offset - 4, _FP_REGNO, r1);
- stxi(alloca_offset - 8, _FP_REGNO, rn(reg));
+ freg = jit_get_reg(jit_class_fpr);
+
+ movi(rn(reg), 0x43300000);
+ stxi_i(off1, _FP_REGNO, rn(reg));
+ movi(rn(reg), 0x80000000);
+ stxi_i(off2, _FP_REGNO, rn(reg));
+ ldxi_d(rn(freg), _FP_REGNO, alloca_offset - 8);
+ xorr(rn(reg), r1, rn(reg));
+ stxi_i(off2, _FP_REGNO, rn(reg));
+ ldxi_d(r0, _FP_REGNO, alloca_offset - 8);
+ subr_d(r0, r0, rn(freg));
+
jit_unget_reg(reg);
+ jit_unget_reg(freg);
# else
stxi(alloca_offset - 8, _FP_REGNO, r1);
-# endif
ldxi_d(r0, _FP_REGNO, alloca_offset - 8);
FCFID(r0, r0);
+# endif
}
static void
}
# endif
+# ifndef _ARCH_PPCSQ
+static void
+_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ movr_f(rn(JIT_FA0), r1);
+ calli((jit_word_t)sqrtf
+# if _CALL_SYSV
+ , 0
+# endif
+ );
+ movr_f(r0, rn(JIT_FRET));
+}
+
+static void
+_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ movr_d(rn(JIT_FA0), r1);
+ calli((jit_word_t)sqrt
+# if _CALL_SYSV
+ , 0
+# endif
+ );
+ movr_d(r0, rn(JIT_FRET));
+}
+# endif
+
# define fpr_opi(name, type, size) \
static void \
_##name##i_##type(jit_state_t *_jit, \
20, /* bswapr_us */
16, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* _CALL_SYV */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
20, /* bswapr_us */
16, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* _CALL_AIX */
#endif /* __BYTEORDER */
#endif /* __powerpc__ */
20, /* bswapr_us */
16, /* bswapr_ui */
44, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __BYTEORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
20, /* bswapr_us */
16, /* bswapr_ui */
44, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
jit_word_t word;
jit_int32_t value;
jit_int32_t offset;
+ jit_bool_t no_flag = 0; /* Set if previous instruction is
+ * *not* a jump target. */
struct {
jit_node_t *node;
jit_word_t word;
# if __WORDSIZE == 64
case_rr(hton, _ul);
# endif
- case_rr(bswap, _us);
- case_rr(bswap, _ui);
+ case jit_code_bswapr_us:
+ bswapr_us_lh(rn(node->u.w), rn(node->v.w), no_flag);
+ break;
+ case jit_code_bswapr_ui:
+ bswapr_ui_lw(rn(node->u.w), rn(node->v.w), no_flag);
+ break;
# if __WORDSIZE == 64
case_rr(bswap, _ul);
# endif
case_rr(neg,);
case_rr(com,);
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rrr(movn,);
case_rrr(movz,);
case_rr(mov,);
}
}
else
- (void)jmpi_p(node->u.w);
+ jmpi(node->u.w);
break;
case jit_code_callr:
callr(rn(node->u.w)
assert(_jitc->regarg == 0 && _jitc->synth == 0);
/* update register live state */
jit_reglive(node);
+
+ no_flag = !(node->flag & jit_flag_patch);
}
#undef case_brf
#undef case_brw
jit_init_print(void)
{
if (!print_stream)
- print_stream = stderr;
+ print_stream = stdout;
}
void
(jit_cc_a0_int|jit_cc_a0_flt|jit_cc_a0_dbl|jit_cc_a0_jmp|
jit_cc_a0_reg|jit_cc_a0_rlh|jit_cc_a0_arg|
jit_cc_a1_reg|jit_cc_a1_int|jit_cc_a1_flt|jit_cc_a1_dbl|jit_cc_a1_arg|
- jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl);
+ jit_cc_a2_reg|jit_cc_a2_int|jit_cc_a2_flt|jit_cc_a2_dbl|jit_cc_a2_rlh);
if (!(node->flag & jit_flag_synth) && ((value & jit_cc_a0_jmp) ||
node->code == jit_code_finishr ||
node->code == jit_code_finishi))
print_chr(' '); print_reg(node->u.q.h);
print_str(") "); print_reg(node->v.w);
print_chr(' '); print_hex(node->w.w); return;
+ r_r_q:
+ print_chr(' '); print_reg(node->u.w);
+ print_chr(' '); print_reg(node->v.w);
+ print_str(" ("); print_reg(node->w.q.l);
+ print_chr(' '); print_reg(node->w.q.h);
+ print_str(") "); return;
+ r_w_q:
+ print_chr(' '); print_reg(node->u.w);
+ print_chr(' '); print_hex(node->v.w);
+ print_str(" ("); print_reg(node->w.q.l);
+ print_chr(' '); print_reg(node->w.q.h);
+ print_str(") "); return;
r_r_f:
print_chr(' '); print_reg(node->u.w);
print_chr(' '); print_reg(node->v.w);
case jit_cc_a0_reg|jit_cc_a0_rlh|
jit_cc_a1_reg|jit_cc_a2_int:
goto q_r_w;
+ case jit_cc_a0_reg|jit_cc_a1_reg|
+ jit_cc_a2_reg|jit_cc_a2_rlh:
+ goto r_r_q;
+ case jit_cc_a0_reg|jit_cc_a1_int|
+ jit_cc_a2_reg|jit_cc_a2_rlh:
+ goto r_w_q;
case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_flt:
goto r_r_f;
case jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a2_dbl:
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+ define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define ltr(r0, r1, r2) SLT(r0, r1, r2)
# define lti(r0, r1, im) _lti(_jit, r0, r1, im)
static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
patch_at(w, _jit->pc.w);
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ fallback_casx(r0, r1, r2, r3, i0);
+}
+
static void
_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
20, /* bswapr_us */
52, /* bswapr_ui */
116, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
#define PROTO 1
# include "jit_riscv-cpu.c"
# include "jit_riscv-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
case_rr(ext, _us);
case_rr(ext, _i);
case_rr(ext, _ui);
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rrr(movn,);
case_rrr(movz,);
case_rr(mov,);
#define CODE 1
# include "jit_riscv-cpu.c"
# include "jit_riscv-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2)
static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
patch_at(w, _jit->pc.w);
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ fallback_casx(r0, r1, r2, r3, i0);
+}
+
static void
_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
52, /* bswapr_us */
128, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
68, /* bswapr_us */
160, /* bswapr_ui */
344, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
#define PROTO 1
# include "jit_s390-cpu.c"
# include "jit_s390-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
case_rr(ext, _i);
case_rr(ext, _ui);
#endif
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rrr(movn,);
case_rrr(movz,);
case_rr(mov,);
#define CODE 1
# include "jit_s390-cpu.c"
# include "jit_s390-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define comr(r0, r1) XNOR(r1, 0, r0)
# define negr(r0, r1) NEG(r1, r0)
# define addr(r0, r1, r2) ADD(r1, r2, r0)
patch_at(w, _jit->pc.w);
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ fallback_casx(r0, r1, r2, r3, i0);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
20, /* bswapr_us */
52, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
20, /* bswapr_us */
52, /* bswapr_ui */
116, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __WORDSIZE */
#define PROTO 1
# include "jit_sparc-cpu.c"
# include "jit_sparc-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
case_rr(ext, _i);
case_rr(ext, _ui);
#endif
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rrr(movn,);
case_rrr(movz,);
case_rr(mov,);
#define CODE 1
# include "jit_sparc-cpu.c"
# include "jit_sparc-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
static void _movsr(jit_state_t*,jit_int32_t,jit_int32_t);
# define movsr_u(r0, r1) _movsr_u(_jit, r0, r1)
static void _movsr_u(jit_state_t*,jit_int32_t,jit_int32_t);
+# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_word_t);
+#define casr(r0, r1, r2, r3) casx(r0, r1, r2, r3, 0)
+#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
#define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2)
static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
#define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2)
mrm(0x03, r7(r0), r7(r1));
}
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+ jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+ jit_int32_t save_rax, restore_rax;
+ jit_int32_t ascasr_reg, ascasr_use;
+ if (r0 != _RAX_REGNO) { /* result not in %rax */
+ if (r2 != _RAX_REGNO) { /* old value not in %rax */
+ save_rax = jit_get_reg(jit_class_gpr);
+ movr(rn(save_rax), _RAX_REGNO);
+ restore_rax = 1;
+ }
+ else
+ restore_rax = 0;
+ }
+ else
+ restore_rax = 0;
+ if (r2 != _RAX_REGNO)
+ movr(_RAX_REGNO, r2);
+ if (r1 == _NOREG) { /* using immediate address */
+ if (!can_sign_extend_int_p(i0)) {
+ ascasr_reg = jit_get_reg(jit_class_gpr);
+ if (ascasr_reg == _RAX) {
+ ascasr_reg = jit_get_reg(jit_class_gpr);
+ jit_unget_reg(_RAX);
+ }
+ ascasr_use = 1;
+ movi(rn(ascasr_reg), i0);
+ }
+ else
+ ascasr_use = 0;
+ }
+ else
+ ascasr_use = 0;
+ ic(0xf0); /* lock */
+ if (ascasr_use)
+ rex(0, WIDE, r3, _NOREG, rn(ascasr_reg));
+ else
+ rex(0, WIDE, r3, _NOREG, r1);
+ ic(0x0f);
+ ic(0xb1);
+ if (r1 != _NOREG) /* casr */
+ rx(r3, 0, r1, _NOREG, _SCL1);
+ else { /* casi */
+ if (ascasr_use)
+ rx(r3, 0, rn(ascasr_reg), _NOREG, _SCL1); /* address in reg */
+ else
+ rx(r3, i0, _NOREG, _NOREG, _SCL1); /* address in offset */
+ }
+ cc(X86_CC_E, r0);
+ if (r0 != _RAX_REGNO)
+ movr(r0, _RAX_REGNO);
+ if (restore_rax) {
+ movr(_RAX_REGNO, rn(save_rax));
+ jit_unget_reg(save_rax);
+ }
+ if (ascasr_use)
+ jit_unget_reg(ascasr_reg);
+}
+
static void
_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
7, /* bswapr_us */
4, /* bswapr_ui */
0, /* bswapr_ul */
+ 9, /* casr */
+ 0, /* casi */
#endif
#if __X64
9, /* bswapr_us */
6, /* bswapr_ui */
6, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#else
# if __X64_32
9, /* bswapr_us */
6, /* bswapr_ui */
0, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
# else
#define JIT_INSTR_MAX 115
9, /* bswapr_us */
6, /* bswapr_ui */
6, /* bswapr_ul */
+ 0, /* casr */
+ 0, /* casi */
#endif /* __CYGWIN__ || _WIN32 */
# endif /* __X64_32 */
#endif /* __X64 */
case_rrw(gt, _u);
case_rrr(ne,);
case_rrw(ne,);
+ case jit_code_casr:
+ casr(rn(node->u.w), rn(node->v.w),
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
+ case jit_code_casi:
+ casi(rn(node->u.w), node->v.w,
+ rn(node->w.q.l), rn(node->w.q.h));
+ break;
case_rrr(movn,);
case_rrr(movz,);
case_rr(mov,);
_reverse_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node);
#define redundant_store(node, jump) _redundant_store(_jit, node, jump)
-static void
+static jit_bool_t
_redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump);
#define simplify_movr(p, n, k, s) _simplify_movr(_jit, p, n, k, s)
_simplify_spill(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno);
#define simplify() _simplify(_jit)
-static void
+static jit_bool_t
_simplify(jit_state_t *_jit);
#define jit_reg_undef -1
return (link_node(node));
}
+jit_node_t *
+_jit_new_node_wwq(jit_state_t *_jit, jit_code_t code,
+ jit_word_t u, jit_word_t v,
+ jit_int32_t l, jit_int32_t h)
+{
+ jit_node_t *node = new_node(code);
+ assert(!_jitc->realize);
+ node->u.w = u;
+ node->v.w = v;
+ node->w.q.l = l;
+ node->w.q.h = h;
+ return (link_node(node));
+}
+
jit_node_t *
_jit_new_node_wwf(jit_state_t *_jit, jit_code_t code,
jit_word_t u, jit_word_t v, jit_float32_t w)
case jit_code_movnr: case jit_code_movzr:
mask = jit_cc_a0_reg|jit_cc_a0_cnd|jit_cc_a1_reg|jit_cc_a2_reg;
break;
+ case jit_code_casr:
+ mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_reg|
+ jit_cc_a2_reg|jit_cc_a2_rlh;
+ break;
+ case jit_code_casi:
+ mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_int|
+ jit_cc_a2_reg|jit_cc_a2_rlh;
+ break;
default:
abort();
}
void
_jit_optimize(jit_state_t *_jit)
{
+ jit_int32_t pass;
jit_bool_t jump;
jit_bool_t todo;
jit_int32_t mask;
sequential_labels();
split_branches();
+ pass = 0;
+
+second_pass:
/* create initial mapping of live register values
* at the start of a basic block */
for (offset = 0; offset < _jitc->blocks.offset; offset++) {
}
} while (todo);
- patch_registers();
- simplify();
+ if (pass == 0) {
+ todo = 0;
- /* figure out labels that are only reached with a jump
- * and is required to do a simple redundant_store removal
- * on jit_beqi below */
- jump = 1;
- for (node = _jitc->head; node; node = node->next) {
- switch (node->code) {
- case jit_code_label:
- if (!jump)
- node->flag |= jit_flag_head;
- break;
- case jit_code_jmpi: case jit_code_jmpr:
- case jit_code_epilog:
- jump = 1;
- break;
- case jit_code_data: case jit_code_note:
- break;
- default:
- jump = 0;
- break;
+ patch_registers();
+ if (simplify())
+ todo = 1;
+
+ /* figure out labels that are only reached with a jump
+ * and is required to do a simple redundant_store removal
+ * on jit_beqi below */
+ jump = 1;
+ for (node = _jitc->head; node; node = node->next) {
+ switch (node->code) {
+ case jit_code_label:
+ if (!jump)
+ node->flag |= jit_flag_head;
+ break;
+ case jit_code_jmpi: case jit_code_jmpr:
+ case jit_code_epilog:
+ jump = 1;
+ break;
+ case jit_code_data: case jit_code_note:
+ break;
+ default:
+ jump = 0;
+ break;
+ }
+ }
+
+ for (node = _jitc->head; node; node = node->next) {
+ mask = jit_classify(node->code);
+ if (mask & jit_cc_a0_reg)
+ node->u.w &= ~jit_regno_patch;
+ if (mask & jit_cc_a1_reg)
+ node->v.w &= ~jit_regno_patch;
+ if (mask & jit_cc_a2_reg)
+ node->w.w &= ~jit_regno_patch;
+ if (node->code == jit_code_beqi) {
+ if (redundant_store(node, 1))
+ todo = 1;
+ }
+ else if (node->code == jit_code_bnei) {
+ if (redundant_store(node, 0))
+ todo = 1;
+ }
+ }
+
+ /* If instructions were removed, must recompute state at
+ * start of blocks. */
+ if (todo) {
+ pass = 1;
+ goto second_pass;
}
}
node->v.w &= ~jit_regno_patch;
if (mask & jit_cc_a2_reg)
node->w.w &= ~jit_regno_patch;
- switch (node->code) {
- case jit_code_prolog:
- _jitc->function = _jitc->functions.ptr + node->w.w;
- break;
- case jit_code_epilog:
- _jitc->function = NULL;
- break;
- case jit_code_beqi:
- redundant_store(node, 1);
- break;
- case jit_code_bnei:
- redundant_store(node, 0);
- break;
- default:
+ if (node->code == jit_code_prolog)
+ _jitc->function = _jitc->functions.ptr + node->w.w;
+ else if(node->code == jit_code_epilog)
+ _jitc->function = NULL;
+ else {
#if JIT_HASH_CONSTS
- if (mask & jit_cc_a0_flt) {
- node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4);
- node->flag |= jit_flag_node | jit_flag_data;
- }
- else if (mask & jit_cc_a0_dbl) {
- node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8);
- node->flag |= jit_flag_node | jit_flag_data;
- }
- else if (mask & jit_cc_a1_flt) {
- node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4);
- node->flag |= jit_flag_node | jit_flag_data;
- }
- else if (mask & jit_cc_a1_dbl) {
- node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8);
- node->flag |= jit_flag_node | jit_flag_data;
- }
- else if (mask & jit_cc_a2_flt) {
- node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4);
- node->flag |= jit_flag_node | jit_flag_data;
- }
- else if (mask & jit_cc_a2_dbl) {
- node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8);
- node->flag |= jit_flag_node | jit_flag_data;
- }
+ if (mask & jit_cc_a0_flt) {
+ node->u.p = jit_data(&node->u.f, sizeof(jit_float32_t), 4);
+ node->flag |= jit_flag_node | jit_flag_data;
+ }
+ else if (mask & jit_cc_a0_dbl) {
+ node->u.p = jit_data(&node->u.d, sizeof(jit_float64_t), 8);
+ node->flag |= jit_flag_node | jit_flag_data;
+ }
+ else if (mask & jit_cc_a1_flt) {
+ node->v.p = jit_data(&node->v.f, sizeof(jit_float32_t), 4);
+ node->flag |= jit_flag_node | jit_flag_data;
+ }
+ else if (mask & jit_cc_a1_dbl) {
+ node->v.p = jit_data(&node->v.d, sizeof(jit_float64_t), 8);
+ node->flag |= jit_flag_node | jit_flag_data;
+ }
+ else if (mask & jit_cc_a2_flt) {
+ node->w.p = jit_data(&node->w.f, sizeof(jit_float32_t), 4);
+ node->flag |= jit_flag_node | jit_flag_data;
+ }
+ else if (mask & jit_cc_a2_dbl) {
+ node->w.p = jit_data(&node->w.d, sizeof(jit_float64_t), 8);
+ node->flag |= jit_flag_node | jit_flag_data;
+ }
#endif
- if (_jitc->function) {
- if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
- (jit_cc_a0_reg|jit_cc_a0_chg)) {
- if (mask & jit_cc_a0_rlh) {
- jit_regset_setbit(&_jitc->function->regset,
- jit_regno(node->u.q.l));
- jit_regset_setbit(&_jitc->function->regset,
- jit_regno(node->u.q.h));
- }
- else
- jit_regset_setbit(&_jitc->function->regset,
- jit_regno(node->u.w));
- }
- if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
- (jit_cc_a1_reg|jit_cc_a1_chg))
+ if (_jitc->function) {
+ if ((mask & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
+ (jit_cc_a0_reg|jit_cc_a0_chg)) {
+ if (mask & jit_cc_a0_rlh) {
+ jit_regset_setbit(&_jitc->function->regset,
+ jit_regno(node->u.q.l));
jit_regset_setbit(&_jitc->function->regset,
- jit_regno(node->v.w));
- if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
- (jit_cc_a2_reg|jit_cc_a2_chg))
+ jit_regno(node->u.q.h));
+ }
+ else
jit_regset_setbit(&_jitc->function->regset,
- jit_regno(node->w.w));
+ jit_regno(node->u.w));
}
- break;
+ if ((mask & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
+ (jit_cc_a1_reg|jit_cc_a1_chg))
+ jit_regset_setbit(&_jitc->function->regset,
+ jit_regno(node->v.w));
+ if ((mask & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
+ (jit_cc_a2_reg|jit_cc_a2_chg))
+ jit_regset_setbit(&_jitc->function->regset,
+ jit_regno(node->w.w));
+ }
}
}
}
else
jit_regset_setbit(&_jitc->reglive, node->v.w);
}
- if ((value & jit_cc_a2_reg) && !(node->w.w & jit_regno_patch)) {
- if (value & jit_cc_a2_chg) {
- jit_regset_clrbit(&_jitc->reglive, node->w.w);
- jit_regset_setbit(&_jitc->regmask, node->w.w);
+ if (value & jit_cc_a2_reg) {
+ if (value & jit_cc_a2_rlh) {
+ /* Assume registers are not changed */
+ if (!(node->w.q.l & jit_regno_patch))
+ jit_regset_setbit(&_jitc->reglive, node->w.q.l);
+ if (!(node->w.q.h & jit_regno_patch))
+ jit_regset_setbit(&_jitc->reglive, node->w.q.h);
+ }
+ else {
+ if (!(node->w.w & jit_regno_patch)) {
+ if (value & jit_cc_a2_chg) {
+ jit_regset_clrbit(&_jitc->reglive, node->w.w);
+ jit_regset_setbit(&_jitc->regmask, node->w.w);
+ }
+ else
+ jit_regset_setbit(&_jitc->reglive, node->w.w);
+ }
}
- else
- jit_regset_setbit(&_jitc->reglive, node->w.w);
}
if (jit_regset_set_p(&_jitc->regmask)) {
jit_update(node->next, &_jitc->reglive, &_jitc->regmask);
}
if (value & jit_cc_a1_reg)
jit_regset_setbit(&_jitc->regarg, jit_regno(node->v.w));
- if (value & jit_cc_a2_reg)
- jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
+ if (value & jit_cc_a2_reg) {
+ if (value & jit_cc_a2_rlh) {
+ jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.l));
+ jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.q.h));
+ }
+ else
+ jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
+ }
}
void
}
if (value & jit_cc_a1_reg)
jit_regset_clrbit(&_jitc->regarg, jit_regno(node->v.w));
- if (value & jit_cc_a2_reg)
- jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w));
+ if (value & jit_cc_a2_reg) {
+ if (value & jit_cc_a2_rlh) {
+ jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.l));
+ jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.q.h));
+ }
+ else
+ jit_regset_clrbit(&_jitc->regarg, jit_regno(node->w.w));
+ }
}
void
default:
value = jit_classify(node->code);
if (value & jit_cc_a2_reg) {
- if (!(node->w.w & jit_regno_patch)) {
- if (jit_regset_tstbit(®mask, node->w.w)) {
- jit_regset_clrbit(®mask, node->w.w);
- if (!(value & jit_cc_a2_chg))
- jit_regset_setbit(®live, node->w.w);
+ if (value & jit_cc_a2_rlh) {
+ if (!(node->w.q.l & jit_regno_patch)) {
+ /* Assume register is not changed */
+ if (jit_regset_tstbit(®mask, node->w.q.l))
+ jit_regset_clrbit(®mask, node->w.q.l);
+ }
+ if (!(node->w.q.h & jit_regno_patch)) {
+ if (jit_regset_tstbit(®mask, node->w.q.h))
+ jit_regset_clrbit(®mask, node->w.q.h);
+ }
+ }
+ else {
+ if (value & jit_cc_a2_reg) {
+ if (!(node->w.w & jit_regno_patch)) {
+ if (jit_regset_tstbit(®mask, node->w.w)) {
+ jit_regset_clrbit(®mask, node->w.w);
+ if (!(value & jit_cc_a2_chg))
+ jit_regset_setbit(®live, node->w.w);
+ }
+ }
}
}
}
* means that only JIT_Vn registers can be trusted on
* arrival of jmpr.
*/
+ jit_regset_set_ui(®mask, 0);
for (regno = 0; regno < _jitc->reglen; regno++) {
spec = jit_class(_rvs[regno].spec);
- if (jit_regset_tstbit(®mask, regno) &&
- (spec & (jit_class_gpr|jit_class_fpr)) &&
- !(spec & jit_class_sav))
- jit_regset_clrbit(®mask, regno);
+ if ((spec & (jit_class_gpr|jit_class_fpr)) &&
+ (spec & jit_class_sav))
+ jit_regset_setbit(®mask, regno);
}
/* Assume non callee save registers are live due
* to jump to unknown location. */
/* Treat all callee save as live. */
- jit_regset_ior(®live, ®live, ®mask);
+ jit_regset_ior(&block->reglive, ®live, ®mask);
/* Treat anything else as dead. */
- jit_regset_set_ui(®mask, 0);
+ return;
}
}
break;
default:
value = jit_classify(node->code);
if (value & jit_cc_a2_reg) {
- if (!(node->w.w & jit_regno_patch)) {
- if (jit_regset_tstbit(mask, node->w.w)) {
- jit_regset_clrbit(mask, node->w.w);
- if (!(value & jit_cc_a2_chg))
- jit_regset_setbit(live, node->w.w);
+ if (value & jit_cc_a2_rlh) {
+ if (!(node->w.q.l & jit_regno_patch)) {
+ /* Assume register is not changed */
+ if (jit_regset_tstbit(mask, node->w.q.l))
+ jit_regset_clrbit(mask, node->w.q.l);
+ }
+ if (!(node->w.q.h & jit_regno_patch)) {
+ if (jit_regset_tstbit(mask, node->w.q.h))
+ jit_regset_clrbit(mask, node->w.q.h);
+ }
+ }
+ else {
+ if (!(node->w.w & jit_regno_patch)) {
+ if (jit_regset_tstbit(mask, node->w.w)) {
+ jit_regset_clrbit(mask, node->w.w);
+ if (!(value & jit_cc_a2_chg))
+ jit_regset_setbit(live, node->w.w);
+ }
}
}
}
* means that only JIT_Vn registers can be trusted on
* arrival of jmpr.
*/
+ jit_regset_set_ui(mask, 0);
for (regno = 0; regno < _jitc->reglen; regno++) {
spec = jit_class(_rvs[regno].spec);
- if (jit_regset_tstbit(mask, regno) &&
- (spec & (jit_class_gpr|jit_class_fpr)) &&
- !(spec & jit_class_sav))
- jit_regset_clrbit(mask, regno);
+ if ((spec & (jit_class_gpr|jit_class_fpr)) &&
+ (spec & jit_class_sav))
+ jit_regset_setbit(mask, regno);
}
/* Assume non callee save registers are live due
* to jump to unknown location. */
/* Treat all callee save as live. */
jit_regset_ior(live, live, mask);
/* Treat anything else as dead. */
- jit_regset_set_ui(mask, 0);
+ return;
}
}
break;
return (0);
}
-static void
+static jit_bool_t
_redundant_store(jit_state_t *_jit, jit_node_t *node, jit_bool_t jump)
{
jit_node_t *iter;
jit_word_t word;
jit_int32_t spec;
jit_int32_t regno;
+ jit_bool_t result;
if (jump) {
prev = node->u.n;
if (prev->code == jit_code_epilog)
- return;
+ return (0);
assert(prev->code == jit_code_label);
if ((prev->flag & jit_flag_head) || node->link || prev->link != node)
/* multiple sources */
- return;
+ return (0);
/* if there are sequential labels it will return below */
}
else
prev = node;
+ result = 0;
word = node->w.w;
regno = jit_regno(node->v.w);
for (iter = prev->next; iter; prev = iter, iter = iter->next) {
switch (iter->code) {
case jit_code_label: case jit_code_prolog:
case jit_code_epilog:
- return;
+ return (result);
case jit_code_movi:
if (regno == jit_regno(iter->u.w)) {
if (iter->flag || iter->v.w != word)
- return;
+ return (result);
+ result = 1;
del_node(prev, iter);
iter = prev;
}
default:
spec = jit_classify(iter->code);
if (spec & jit_cc_a0_jmp)
- return;
+ return (result);
if ((spec & (jit_cc_a0_reg|jit_cc_a0_chg)) ==
(jit_cc_a0_reg|jit_cc_a0_chg)) {
if (spec & jit_cc_a0_rlh) {
if (regno == jit_regno(iter->u.q.l) ||
regno == jit_regno(iter->u.q.h))
- return;
+ return (result);
}
else {
if (regno == jit_regno(iter->u.w))
- return;
+ return (result);
}
}
if ((spec & (jit_cc_a1_reg|jit_cc_a1_chg)) ==
(jit_cc_a1_reg|jit_cc_a1_chg)) {
if (regno == jit_regno(iter->v.w))
- return;
+ return (result);
}
if ((spec & (jit_cc_a2_reg|jit_cc_a2_chg)) ==
(jit_cc_a2_reg|jit_cc_a2_chg)) {
if (regno == jit_regno(iter->w.w))
- return;
+ return (result);
}
break;
}
* once to the same value, and is a common pattern of calls
* to jit_pushargi and jit_pushargr
*/
-static void
+static jit_bool_t
_simplify(jit_state_t *_jit)
{
jit_node_t *prev;
jit_node_t *next;
jit_int32_t info;
jit_int32_t regno;
+ jit_bool_t result;
+ result = 0;
for (prev = NULL, node = _jitc->head; node; prev = node, node = next) {
next = node->next;
switch (node->code) {
* already holding */
patch_register(node->link->next, node,
jit_regno_patch|regno, regno);
+ result = 1;
del_node(_jitc->spill[regno], node->link);
del_node(prev, node);
node = prev;
case jit_code_movr:
regno = jit_regno(node->u.w);
if (simplify_movr(prev, node,
- jit_kind_word, sizeof(jit_word_t)))
+ jit_kind_word, sizeof(jit_word_t))) {
+ result = 1;
simplify_spill(node = prev, regno);
+ }
break;
case jit_code_movi:
regno = jit_regno(node->u.w);
if (simplify_movi(prev, node,
- jit_kind_word, sizeof(jit_word_t)))
+ jit_kind_word, sizeof(jit_word_t))) {
+ result = 1;
simplify_spill(node = prev, regno);
+ }
break;
case jit_code_movr_f:
regno = jit_regno(node->u.w);
if (simplify_movr(prev, node,
- jit_kind_float32, sizeof(jit_float32_t)))
+ jit_kind_float32, sizeof(jit_float32_t))) {
+ result = 1;
simplify_spill(node = prev, regno);
+ }
break;
case jit_code_movi_f:
regno = jit_regno(node->u.w);
if (simplify_movi(prev, node,
- jit_kind_float32, sizeof(jit_float32_t)))
+ jit_kind_float32, sizeof(jit_float32_t))) {
+ result = 1;
simplify_spill(node = prev, regno);
+ }
break;
case jit_code_movr_d:
regno = jit_regno(node->u.w);
if (simplify_movr(prev, node,
- jit_kind_float64, sizeof(jit_float64_t)))
+ jit_kind_float64, sizeof(jit_float64_t))) {
+ result = 1;
simplify_spill(node = prev, regno);
+ }
break;
case jit_code_movi_d:
regno = jit_regno(node->u.w);
if (simplify_movi(prev, node,
- jit_kind_float64, sizeof(jit_float64_t)))
+ jit_kind_float64, sizeof(jit_float64_t))) {
+ result = 1;
simplify_spill(node = prev, regno);
+ }
break;
case jit_code_ldxi_c: case jit_code_ldxi_uc:
case jit_code_ldxi_s: case jit_code_ldxi_us:
case jit_code_ldxi_l:
case jit_code_ldxi_f: case jit_code_ldxi_d:
regno = jit_regno(node->u.w);
- if (simplify_ldxi(prev, node))
+ if (simplify_ldxi(prev, node)) {
+ result = 1;
simplify_spill(node = prev, regno);
+ }
break;
case jit_code_stxi_c: case jit_code_stxi_s:
case jit_code_stxi_i: case jit_code_stxi_l:
case jit_code_stxi_f: case jit_code_stxi_d:
regno = jit_regno(node->u.w);
- if (simplify_stxi(prev, node))
+ if (simplify_stxi(prev, node)) {
+ result = 1;
simplify_spill(node = prev, regno);
+ }
break;
default:
info = jit_classify(node->code);
++_jitc->gen[regno];
}
if (info & jit_cc_a2_chg) {
- regno = jit_regno(node->w.w);
- _jitc->values[regno].kind = 0;
- ++_jitc->gen[regno];
+#if 0
+ /* Assume registers are not changed */
+ if (info & jit_cc_a2_rlh) {
+ regno = jit_regno(node->w.q.l);
+ _jitc->values[regno].kind = 0;
+ ++_jitc->gen[regno];
+ regno = jit_regno(node->w.q.h);
+ _jitc->values[regno].kind = 0;
+ ++_jitc->gen[regno];
+ }
+ else {
+#endif
+ regno = jit_regno(node->w.w);
+ _jitc->values[regno].kind = 0;
+ ++_jitc->gen[regno];
+#if 0
+ }
+#endif
}
break;
}
}
+ return (result);
}
static jit_int32_t
}
if ((value & jit_cc_a1_reg) && node->v.w == regno)
node->v.w = patch;
- if ((value & jit_cc_a2_reg) && node->w.w == regno)
- node->w.w = patch;
+ if (value & jit_cc_a2_reg) {
+ if (value & jit_cc_a2_rlh) {
+ if (node->w.q.l == regno)
+ node->w.q.l = patch;
+ if (node->w.q.h == regno)
+ node->w.q.h = patch;
+ }
+ else {
+ if (node->w.w == regno)
+ node->w.w = patch;
+ }
+ }
}
}
[subrepo]
remote = https://github.com/pcercuei/lightrec.git
branch = master
- commit = 7545b5a7995be9e7b70e786a6b534004ea26c999
- parent = 2fba93f2853c57240f031adb4712acbd2a066d34
+ commit = e1222761836bb478dcec86cf441dcc5514565137
+ parent = eeff1b0a26e4c7f7449640c0bf999e506f538694
method = merge
cmdver = 0.4.3
* [__pcsx4all__ (my own fork)](https://github.com/pcercuei/pcsx4all)
-* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/)
\ No newline at end of file
+* [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/)
+
+[![Star History Chart](https://api.star-history.com/svg?repos=pcercuei/lightrec&type=Date)](https://star-history.com/#pcercuei/lightrec&Date)
#include "debug.h"
#include "lightrec-private.h"
#include "memmanager.h"
+#include "reaper.h"
+#include "recompiler.h"
#include <stdbool.h>
#include <stdlib.h>
struct block *block, *next;
bool outdated = all;
unsigned int i;
+ u8 old_flags;
for (i = 0; i < LUT_SIZE; i++) {
for (block = cache->lut[i]; block; block = next) {
lightrec_block_is_outdated(state, block);
}
- if (outdated) {
+ if (!outdated)
+ continue;
+
+ old_flags = block_set_flags(block, BLOCK_IS_DEAD);
+
+ if (!(old_flags & BLOCK_IS_DEAD)) {
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_recompiler_remove(state->rec, block);
+
pr_debug("Freeing outdated block at PC 0x%08x\n", block->pc);
remove_from_code_lut(cache, block);
lightrec_unregister_block(cache, block);
return hash;
}
+static void lightrec_reset_lut_offset(struct lightrec_state *state, void *d)
+{
+ u32 pc = (u32)(uintptr_t) d;
+ struct block *block;
+ void *addr;
+
+ block = lightrec_find_block(state->block_cache, pc);
+ if (!block)
+ return;
+
+ if (block_has_flag(block, BLOCK_IS_DEAD))
+ return;
+
+ addr = block->function ?: state->get_next_block;
+ lut_write(state, lut_offset(pc), addr);
+}
+
bool lightrec_block_is_outdated(struct lightrec_state *state, struct block *block)
{
u32 offset = lut_offset(block->pc);
bool outdated;
- void *addr;
if (lut_read(state, offset))
return false;
if (likely(!outdated)) {
/* The block was marked as outdated, but the content is still
* the same */
- if (block->function)
- addr = block->function;
- else
- addr = state->get_next_block;
- lut_write(state, offset, addr);
+ if (ENABLE_THREADED_COMPILER) {
+ /*
+ * When compiling a block that covers ours, the threaded
+ * compiler will set the LUT entries of the various
+ * entry points. Therefore we cannot write the LUT here,
+ * as we would risk overwriting the new entry points.
+ * Leave it to the reaper to re-install the LUT entries.
+ */
+
+ lightrec_reaper_add(state->reaper,
+ lightrec_reset_lut_offset,
+ (void *)(uintptr_t) block->pc);
+ } else if (block->function) {
+ lut_write(state, offset, block->function);
+ } else {
+ lut_write(state, offset, state->get_next_block);
+ }
}
return outdated;
#include "lightrec-private.h"
#include "regcache.h"
-static const char *std_opcodes[] = {
+static const char * const std_opcodes[] = {
[OP_J] = "j ",
[OP_JAL] = "jal ",
[OP_BEQ] = "beq ",
[OP_SWC2] = "swc2 ",
};
-static const char *special_opcodes[] = {
+static const char * const special_opcodes[] = {
[OP_SPECIAL_SLL] = "sll ",
[OP_SPECIAL_SRL] = "srl ",
[OP_SPECIAL_SRA] = "sra ",
[OP_SPECIAL_SLTU] = "sltu ",
};
-static const char *regimm_opcodes[] = {
+static const char * const regimm_opcodes[] = {
[OP_REGIMM_BLTZ] = "bltz ",
[OP_REGIMM_BGEZ] = "bgez ",
[OP_REGIMM_BLTZAL] = "bltzal ",
[OP_REGIMM_BGEZAL] = "bgezal ",
};
-static const char *cp0_opcodes[] = {
+static const char * const cp0_opcodes[] = {
[OP_CP0_MFC0] = "mfc0 ",
[OP_CP0_CFC0] = "cfc0 ",
[OP_CP0_MTC0] = "mtc0 ",
[OP_CP0_RFE] = "rfe",
};
-static const char *cp2_opcodes[] = {
+static const char * const cp2_basic_opcodes[] = {
[OP_CP2_BASIC_MFC2] = "mfc2 ",
[OP_CP2_BASIC_CFC2] = "cfc2 ",
[OP_CP2_BASIC_MTC2] = "mtc2 ",
[OP_CP2_BASIC_CTC2] = "ctc2 ",
};
-static const char *opcode_flags[] = {
+static const char * const cp2_opcodes[] = {
+ [OP_CP2_RTPS] = "rtps ",
+ [OP_CP2_NCLIP] = "nclip ",
+ [OP_CP2_OP] = "op ",
+ [OP_CP2_DPCS] = "dpcs ",
+ [OP_CP2_INTPL] = "intpl ",
+ [OP_CP2_MVMVA] = "mvmva ",
+ [OP_CP2_NCDS] = "ncds ",
+ [OP_CP2_CDP] = "cdp ",
+ [OP_CP2_NCDT] = "ncdt ",
+ [OP_CP2_NCCS] = "nccs ",
+ [OP_CP2_CC] = "cc ",
+ [OP_CP2_NCS] = "ncs ",
+ [OP_CP2_NCT] = "nct ",
+ [OP_CP2_SQR] = "sqr ",
+ [OP_CP2_DCPL] = "dcpl ",
+ [OP_CP2_DPCT] = "dpct ",
+ [OP_CP2_AVSZ3] = "avsz3 ",
+ [OP_CP2_AVSZ4] = "avsz4 ",
+ [OP_CP2_RTPT] = "rtpt ",
+ [OP_CP2_GPF] = "gpf ",
+ [OP_CP2_GPL] = "gpl ",
+ [OP_CP2_NCCT] = "ncct ",
+};
+
+static const char * const mult2_opcodes[] = {
+ "mult2 ", "multu2 ",
+};
+
+static const char * const opcode_flags[] = {
"switched branch/DS",
"sync point",
};
-static const char *opcode_io_flags[] = {
+static const char * const opcode_io_flags[] = {
"self-modifying code",
"no invalidation",
"no mask",
};
-static const char *opcode_io_modes[] = {
+static const char * const opcode_io_modes[] = {
"Memory access",
"I/O access",
"RAM access",
"BIOS access",
"Scratchpad access",
+ "Mapped I/O access"
};
-static const char *opcode_branch_flags[] = {
+static const char * const opcode_branch_flags[] = {
"emulate branch",
"local branch",
};
-static const char *opcode_multdiv_flags[] = {
+static const char * const opcode_multdiv_flags[] = {
"No LO",
"No HI",
"No div check",
};
static int print_flags(char *buf, size_t len, const struct opcode *op,
- const char **array, size_t array_size,
+ const char * const *array, size_t array_size,
bool is_io)
{
const char *flag_name, *io_mode_name;
}
static int print_op_special(union code c, char *buf, size_t len,
- const char ***flags_ptr, size_t *nb_flags)
+ const char * const **flags_ptr, size_t *nb_flags)
{
switch (c.r.op) {
case OP_SPECIAL_SLL:
static int print_op_cp(union code c, char *buf, size_t len, unsigned int cp)
{
if (cp == 2) {
- switch (c.i.rs) {
- case OP_CP0_MFC0:
- case OP_CP0_CFC0:
- case OP_CP0_MTC0:
- case OP_CP0_CTC0:
+ switch (c.r.op) {
+ case OP_CP2_BASIC:
return snprintf(buf, len, "%s%s,%u",
- cp2_opcodes[c.i.rs],
+ cp2_basic_opcodes[c.i.rs],
lightrec_reg_name(c.i.rt),
c.r.rd);
default:
- return snprintf(buf, len, "cp2 (0x%08x)", c.opcode);
+ return snprintf(buf, len, "%s", cp2_opcodes[c.r.op]);
}
} else {
switch (c.i.rs) {
}
static int print_op(union code c, u32 pc, char *buf, size_t len,
- const char ***flags_ptr, size_t *nb_flags,
+ const char * const **flags_ptr, size_t *nb_flags,
bool *is_io)
{
if (c.opcode == 0)
return snprintf(buf, len, "exts %s,%s",
lightrec_reg_name(c.i.rt),
lightrec_reg_name(c.i.rs));
+ case OP_META_MULT2:
+ case OP_META_MULTU2:
+ *flags_ptr = opcode_multdiv_flags;
+ *nb_flags = ARRAY_SIZE(opcode_multdiv_flags);
+ return snprintf(buf, len, "%s%s,%s,%s,%u",
+ mult2_opcodes[c.i.op == OP_META_MULTU2],
+ lightrec_reg_name(get_mult_div_hi(c)),
+ lightrec_reg_name(get_mult_div_lo(c)),
+ lightrec_reg_name(c.r.rs), c.r.op);
default:
return snprintf(buf, len, "unknown (0x%08x)", c.opcode);
}
void lightrec_print_disassembly(const struct block *block, const u32 *code_ptr)
{
const struct opcode *op;
- const char **flags_ptr;
+ const char * const *flags_ptr;
size_t nb_flags, count, count2;
char buf[256], buf2[256], buf3[256];
unsigned int i;
#define LIGHTREC_IO_RAM 0x3
#define LIGHTREC_IO_BIOS 0x4
#define LIGHTREC_IO_SCRATCH 0x5
+#define LIGHTREC_IO_DIRECT_HW 0x6
#define LIGHTREC_IO_MASK LIGHTREC_IO_MODE(0x7)
#define LIGHTREC_FLAGS_GET_IO_MODE(x) \
(((x) & LIGHTREC_IO_MASK) >> LIGHTREC_IO_MODE_LSB)
OP_META_EXTC = 0x17,
OP_META_EXTS = 0x18,
+
+ OP_META_MULT2 = 0x19,
+ OP_META_MULTU2 = 0x1a,
};
enum special_opcodes {
enum cp2_opcodes {
OP_CP2_BASIC = 0x00,
+ OP_CP2_RTPS = 0x01,
+ OP_CP2_NCLIP = 0x06,
+ OP_CP2_OP = 0x0c,
+ OP_CP2_DPCS = 0x10,
+ OP_CP2_INTPL = 0x11,
+ OP_CP2_MVMVA = 0x12,
+ OP_CP2_NCDS = 0x13,
+ OP_CP2_CDP = 0x14,
+ OP_CP2_NCDT = 0x16,
+ OP_CP2_NCCS = 0x1b,
+ OP_CP2_CC = 0x1c,
+ OP_CP2_NCS = 0x1e,
+ OP_CP2_NCT = 0x20,
+ OP_CP2_SQR = 0x28,
+ OP_CP2_DCPL = 0x29,
+ OP_CP2_DPCT = 0x2a,
+ OP_CP2_AVSZ3 = 0x2d,
+ OP_CP2_AVSZ4 = 0x2e,
+ OP_CP2_RTPT = 0x30,
+ OP_CP2_GPF = 0x3d,
+ OP_CP2_GPL = 0x3e,
+ OP_CP2_NCCT = 0x3f,
};
enum cp2_basic_opcodes {
u32 flags;
};
+struct opcode_list {
+ u16 nb_ops;
+ struct opcode ops[];
+};
+
void lightrec_print_disassembly(const struct block *block, const u32 *code);
static inline _Bool op_flag_no_ds(u32 flags)
block->pc + (offset << 2));
}
+static void
+lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit)
+{
+ /* Prevent jit_jmpi() from using our cycles register as a temporary */
+ jit_live(LIGHTREC_REG_CYCLE);
+
+ jit_patch_abs(jit_jmpi(), state->state->eob_wrapper_func);
+}
+
static void lightrec_emit_end_of_block(struct lightrec_cstate *state,
const struct block *block, u16 offset,
s8 reg_new_pc, u32 imm, u8 ra_reg,
const struct opcode *op = &block->opcode_list[offset],
*next = &block->opcode_list[offset + 1];
u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c);
- u16 offset_after_eob;
jit_note(__FILE__, __LINE__);
pr_debug("EOB: %u cycles\n", cycles);
}
- offset_after_eob = offset + 1 +
- (has_delay_slot(op->c) && !op_flag_no_ds(op->flags));
-
- if (offset_after_eob < block->nb_ops)
- state->branches[state->nb_branches++] = jit_b();
+ lightrec_jump_to_eob(state, _jit);
}
void lightrec_emit_eob(struct lightrec_cstate *state, const struct block *block,
jit_movi(JIT_V0, block->pc + (offset << 2));
jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, cycles);
- state->branches[state->nb_branches++] = jit_b();
+ lightrec_jump_to_eob(state, _jit);
}
static u8 get_jr_jalr_reg(struct lightrec_cstate *state, const struct block *block, u16 offset)
}
static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 offset,
- jit_code_t code, u32 link, bool unconditional, bool bz)
+ jit_code_t code, jit_code_t code2, u32 link, bool unconditional, bool bz)
{
struct regcache *reg_cache = state->reg_cache;
struct native_register *regs_backup;
bool is_forward = (s16)op->i.imm >= -1;
int op_cycles = lightrec_cycles_of_opcode(op->c);
u32 target_offset, cycles = state->cycles + op_cycles;
+ bool no_indirection = false;
u32 next_pc;
jit_note(__FILE__, __LINE__);
/* Unload dead registers before evaluating the branch */
if (OPT_EARLY_UNLOAD)
lightrec_do_early_unload(state, block, offset);
+
+ if (op_flag_local_branch(op->flags) &&
+ (op_flag_no_ds(op->flags) || !next->opcode) &&
+ is_forward && !lightrec_has_dirty_regs(reg_cache))
+ no_indirection = true;
+
+ if (no_indirection)
+ pr_debug("Using no indirection for branch at offset 0x%hx\n", offset << 2);
}
if (cycles)
if (!unconditional) {
/* Generate the branch opcode */
- addr = jit_new_node_pww(code, NULL, rs, rt);
+ if (!no_indirection)
+ addr = jit_new_node_pww(code, NULL, rs, rt);
lightrec_free_regs(reg_cache);
regs_backup = lightrec_regcache_enter_branch(reg_cache);
state->nb_local_branches++];
branch->target = target_offset;
- if (is_forward)
+
+ if (no_indirection)
+ branch->branch = jit_new_node_pww(code2, NULL, rs, rt);
+ else if (is_forward)
branch->branch = jit_b();
else
branch->branch = jit_bgti(LIGHTREC_REG_CYCLE, 0);
}
if (!unconditional) {
- jit_patch(addr);
+ if (!no_indirection)
+ jit_patch(addr);
+
lightrec_regcache_leave_branch(reg_cache, regs_backup);
if (bz && link) {
_jit_name(block->_jit, __func__);
if (c.i.rt == 0)
- rec_b(state, block, offset, jit_code_beqi, 0, false, true);
+ rec_b(state, block, offset, jit_code_beqi, jit_code_bnei, 0, false, true);
else
- rec_b(state, block, offset, jit_code_beqr, 0, false, false);
+ rec_b(state, block, offset, jit_code_beqr, jit_code_bner, 0, false, false);
}
static void rec_BEQ(struct lightrec_cstate *state,
_jit_name(block->_jit, __func__);
if (c.i.rt == 0)
- rec_b(state, block, offset, jit_code_bnei, 0, c.i.rs == 0, true);
+ rec_b(state, block, offset, jit_code_bnei, jit_code_beqi, 0, c.i.rs == 0, true);
else
- rec_b(state, block, offset, jit_code_bner, 0, c.i.rs == c.i.rt, false);
+ rec_b(state, block, offset, jit_code_bner, jit_code_beqr, 0, c.i.rs == c.i.rt, false);
}
static void rec_BLEZ(struct lightrec_cstate *state,
union code c = block->opcode_list[offset].c;
_jit_name(block->_jit, __func__);
- rec_b(state, block, offset, jit_code_bgti, 0, c.i.rs == 0, true);
+ rec_b(state, block, offset, jit_code_bgti, jit_code_blei, 0, c.i.rs == 0, true);
}
static void rec_BGTZ(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_b(state, block, offset, jit_code_blei, 0, false, true);
+ rec_b(state, block, offset, jit_code_blei, jit_code_bgti, 0, false, true);
}
static void rec_regimm_BLTZ(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_b(state, block, offset, jit_code_bgei, 0, false, true);
+ rec_b(state, block, offset, jit_code_bgei, jit_code_blti, 0, false, true);
}
static void rec_regimm_BLTZAL(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_b(state, block, offset, jit_code_bgei,
+ rec_b(state, block, offset, jit_code_bgei, jit_code_blti,
get_branch_pc(block, offset, 2), false, true);
}
union code c = block->opcode_list[offset].c;
_jit_name(block->_jit, __func__);
- rec_b(state, block, offset, jit_code_blti, 0, !c.i.rs, true);
+ rec_b(state, block, offset, jit_code_blti, jit_code_bgei, 0, !c.i.rs, true);
}
static void rec_regimm_BGEZAL(struct lightrec_cstate *state,
{
const struct opcode *op = &block->opcode_list[offset];
_jit_name(block->_jit, __func__);
- rec_b(state, block, offset, jit_code_blti,
+ rec_b(state, block, offset, jit_code_blti, jit_code_bgei,
get_branch_pc(block, offset, 2),
!op->i.rs, true);
}
/* E(rd) = (E(rs) & E(rt)) | (E(rt) & !Z(rt)) | (E(rs) & !Z(rs)) */
if ((REG_EXT & flags_rs & flags_rt) ||
- (flags_rt & (REG_EXT | REG_ZEXT) == REG_EXT) ||
- (flags_rs & (REG_EXT | REG_ZEXT) == REG_EXT))
+ ((flags_rt & (REG_EXT | REG_ZEXT)) == REG_EXT) ||
+ ((flags_rs & (REG_EXT | REG_ZEXT)) == REG_EXT))
flags_rd |= REG_EXT;
lightrec_set_reg_out_flags(reg_cache, rd, flags_rd);
rec_alu_mv_lo_hi(state, block, REG_LO, c.r.rs);
}
-static void call_to_c_wrapper(struct lightrec_cstate *state, const struct block *block,
- u32 arg, bool with_arg, enum c_wrappers wrapper)
+static void call_to_c_wrapper(struct lightrec_cstate *state,
+ const struct block *block, u32 arg,
+ enum c_wrappers wrapper)
{
struct regcache *reg_cache = state->reg_cache;
jit_state_t *_jit = block->_jit;
- u8 tmp;
+ s8 tmp, tmp2;
- tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
- jit_ldxi(tmp, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, wrappers_eps[wrapper]));
+ /* Make sure JIT_R1 is not mapped; it will be used in the C wrapper. */
+ tmp2 = lightrec_alloc_reg(reg_cache, _jit, JIT_R1);
- if (with_arg) {
- jit_prepare();
- jit_pushargi(arg);
+ tmp = lightrec_get_reg_with_value(reg_cache,
+ (intptr_t) state->state->wrappers_eps[wrapper]);
+ if (tmp < 0) {
+ tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+ jit_ldxi(tmp, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, wrappers_eps[wrapper]));
+
+ lightrec_temp_set_value(reg_cache, tmp,
+ (intptr_t) state->state->wrappers_eps[wrapper]);
}
+ lightrec_free_reg(reg_cache, tmp2);
+
+#ifdef __mips__
+ /* On MIPS, register t9 is always used as the target register for JALR.
+ * Therefore if it does not contain the target address we must
+ * invalidate it. */
+ if (tmp != _T9)
+ lightrec_unload_reg(reg_cache, _jit, _T9);
+#endif
+
+ jit_prepare();
+ jit_pushargi(arg);
+
lightrec_regcache_mark_live(reg_cache, _jit);
jit_callr(tmp);
lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
if (is_tagged) {
- call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_RW);
+ call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_RW);
} else {
lut_entry = lightrec_get_lut_entry(block);
call_to_c_wrapper(state, block, (lut_entry << 16) | offset,
- true, C_WRAPPER_RW_GENERIC);
+ C_WRAPPER_RW_GENERIC);
}
}
bool add_imm = c.i.imm &&
((!state->mirrors_mapped && !no_mask) || (invalidate &&
((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt))));
- bool need_tmp = !no_mask || addr_offset || add_imm;
- bool need_tmp2 = addr_offset || invalidate;
+ bool need_tmp = !no_mask || addr_offset || add_imm || invalidate;
rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
addr_reg = tmp;
}
- if (need_tmp2)
- tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
-
if (addr_offset) {
+ tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
jit_addi(tmp2, addr_reg, addr_offset);
addr_reg2 = tmp2;
} else {
tmp3 = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0);
if (c.i.op != OP_SW) {
- jit_andi(tmp2, addr_reg, ~3);
- addr_reg = tmp2;
+ jit_andi(tmp, addr_reg, ~3);
+ addr_reg = tmp;
}
if (!lut_is_32bit(state)) {
- jit_lshi(tmp2, addr_reg, 1);
- addr_reg = tmp2;
+ jit_lshi(tmp, addr_reg, 1);
+ addr_reg = tmp;
}
if (addr_reg == rs && c.i.rs == 0) {
addr_reg = LIGHTREC_REG_STATE;
} else {
- jit_addr(tmp2, addr_reg, LIGHTREC_REG_STATE);
- addr_reg = tmp2;
+ jit_addr(tmp, addr_reg, LIGHTREC_REG_STATE);
+ addr_reg = tmp;
}
if (lut_is_32bit(state))
lightrec_free_reg(reg_cache, tmp3);
}
- if (need_tmp2)
+ if (addr_offset)
lightrec_free_reg(reg_cache, tmp2);
if (need_tmp)
lightrec_free_reg(reg_cache, tmp);
0x1fffffff, false);
}
+static void rec_store_io(struct lightrec_cstate *cstate,
+ const struct block *block, u16 offset,
+ jit_code_t code, jit_code_t swap_code)
+{
+ _jit_note(block->_jit, __FILE__, __LINE__);
+
+ return rec_store_memory(cstate, block, offset, code, swap_code,
+ cstate->state->offset_io,
+ 0x1fffffff, false);
+}
+
static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate,
const struct block *block,
u16 offset, jit_code_t code,
jit_note(__FILE__, __LINE__);
rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0);
- rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
if (state->offset_ram || state->offset_scratch)
lightrec_free_reg(reg_cache, tmp2);
}
+ rt = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rt, 0);
+
if (is_big_endian() && swap_code && c.i.rt) {
tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit);
rec_store_direct(state, block, offset, code, swap_code);
}
break;
+ case LIGHTREC_IO_DIRECT_HW:
+ rec_store_io(state, block, offset, code, swap_code);
+ break;
default:
rec_io(state, block, offset, true, false);
break;
cstate->state->offset_scratch, 0x1fffffff);
}
+static void rec_load_io(struct lightrec_cstate *cstate,
+ const struct block *block, u16 offset,
+ jit_code_t code, jit_code_t swap_code, bool is_unsigned)
+{
+ _jit_note(block->_jit, __FILE__, __LINE__);
+
+ rec_load_memory(cstate, block, offset, code, swap_code, is_unsigned,
+ cstate->state->offset_io, 0x1fffffff);
+}
+
static void rec_load_direct(struct lightrec_cstate *cstate,
const struct block *block, u16 offset,
jit_code_t code, jit_code_t swap_code,
case LIGHTREC_IO_SCRATCH:
rec_load_scratch(state, block, offset, code, swap_code, is_unsigned);
break;
+ case LIGHTREC_IO_DIRECT_HW:
+ rec_load_io(state, block, offset, code, swap_code, is_unsigned);
+ break;
case LIGHTREC_IO_DIRECT:
rec_load_direct(state, block, offset, code, swap_code, is_unsigned);
break;
static void rec_LH(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
+ jit_code_t code = is_big_endian() ? jit_code_ldxi_us : jit_code_ldxi_s;
+
_jit_name(block->_jit, __func__);
- rec_load(state, block, offset, jit_code_ldxi_s, jit_code_bswapr_us, false);
+ rec_load(state, block, offset, code, jit_code_bswapr_us, false);
}
static void rec_LHU(struct lightrec_cstate *state, const struct block *block, u16 offset)
static void rec_LW(struct lightrec_cstate *state, const struct block *block, u16 offset)
{
+ jit_code_t code;
+
+ if (is_big_endian() && __WORDSIZE == 64)
+ code = jit_code_ldxi_ui;
+ else
+ code = jit_code_ldxi_i;
+
_jit_name(block->_jit, __func__);
- rec_load(state, block, offset, jit_code_ldxi_i, jit_code_bswapr_ui, false);
+ rec_load(state, block, offset, code, jit_code_bswapr_ui, false);
}
static void rec_LWC2(struct lightrec_cstate *state, const struct block *block, u16 offset)
}
static void rec_break_syscall(struct lightrec_cstate *state,
- const struct block *block, u16 offset, bool is_break)
+ const struct block *block, u16 offset,
+ u32 exit_code)
{
+ struct regcache *reg_cache = state->reg_cache;
+ jit_state_t *_jit = block->_jit;
+ u8 tmp;
+
_jit_note(block->_jit, __FILE__, __LINE__);
- if (is_break)
- call_to_c_wrapper(state, block, 0, false, C_WRAPPER_BREAK);
- else
- call_to_c_wrapper(state, block, 0, false, C_WRAPPER_SYSCALL);
+ tmp = lightrec_alloc_reg_temp(reg_cache, _jit);
+
+ jit_movi(tmp, exit_code);
+ jit_stxi_i(offsetof(struct lightrec_state, exit_flags),
+ LIGHTREC_REG_STATE, tmp);
+
+ lightrec_free_reg(reg_cache, tmp);
/* TODO: the return address should be "pc - 4" if we're a delay slot */
lightrec_emit_end_of_block(state, block, offset, -1,
const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_break_syscall(state, block, offset, false);
+ rec_break_syscall(state, block, offset, LIGHTREC_EXIT_SYSCALL);
}
static void rec_special_BREAK(struct lightrec_cstate *state,
const struct block *block, u16 offset)
{
_jit_name(block->_jit, __func__);
- rec_break_syscall(state, block, offset, true);
+ rec_break_syscall(state, block, offset, LIGHTREC_EXIT_BREAK);
}
static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u16 offset)
lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false);
lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false);
- call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_MTC);
+ call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MTC);
if (c.i.op == OP_CP0 &&
!op_flag_no_ds(block->opcode_list[offset].flags) &&
jit_name(__func__);
jit_note(__FILE__, __LINE__);
- call_to_c_wrapper(state, block, c.opcode, true, C_WRAPPER_CP);
+ call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_CP);
}
static void rec_meta_MOV(struct lightrec_cstate *state,
lightrec_free_reg(reg_cache, rt);
}
+static void rec_meta_MULT2(struct lightrec_cstate *state,
+ const struct block *block,
+ u16 offset)
+{
+ struct regcache *reg_cache = state->reg_cache;
+ union code c = block->opcode_list[offset].c;
+ jit_state_t *_jit = block->_jit;
+ u8 reg_lo = get_mult_div_lo(c);
+ u8 reg_hi = get_mult_div_hi(c);
+ u32 flags = block->opcode_list[offset].flags;
+ bool is_signed = c.i.op == OP_META_MULT2;
+ u8 rs, lo, hi, rflags = 0, hiflags = 0;
+
+ if (!op_flag_no_hi(flags) && c.r.op < 32) {
+ rflags = is_signed ? REG_EXT : REG_ZEXT;
+ hiflags = is_signed ? REG_EXT : (REG_EXT | REG_ZEXT);
+ }
+
+ _jit_name(block->_jit, __func__);
+ jit_note(__FILE__, __LINE__);
+
+ rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, rflags);
+
+ if (!op_flag_no_lo(flags)) {
+ lo = lightrec_alloc_reg_out(reg_cache, _jit, reg_lo, 0);
+
+ if (c.r.op < 32)
+ jit_lshi(lo, rs, c.r.op);
+ else
+ jit_movi(lo, 0);
+
+ lightrec_free_reg(reg_cache, lo);
+ }
+
+ if (!op_flag_no_hi(flags)) {
+ hi = lightrec_alloc_reg_out(reg_cache, _jit, reg_hi, hiflags);
+
+ if (c.r.op >= 32)
+ jit_lshi(hi, rs, c.r.op - 32);
+ else if (is_signed)
+ jit_rshi(hi, rs, 32 - c.r.op);
+ else
+ jit_rshi_u(hi, rs, 32 - c.r.op);
+
+ lightrec_free_reg(reg_cache, hi);
+ }
+
+ lightrec_free_reg(reg_cache, rs);
+
+ _jit_name(block->_jit, __func__);
+ jit_note(__FILE__, __LINE__);
+}
+
static const lightrec_rec_func_t rec_standard[64] = {
SET_DEFAULT_ELM(rec_standard, unknown_opcode),
[OP_SPECIAL] = rec_SPECIAL,
[OP_META_MOV] = rec_meta_MOV,
[OP_META_EXTC] = rec_meta_EXTC_EXTS,
[OP_META_EXTS] = rec_meta_EXTC_EXTS,
+ [OP_META_MULT2] = rec_meta_MULT2,
+ [OP_META_MULTU2] = rec_meta_MULT2,
};
static const lightrec_rec_func_t rec_special[64] = {
return jump_next(inter);
}
+static u32 int_META_MULT2(struct interpreter *inter)
+{
+ u32 *reg_cache = inter->state->regs.gpr;
+ union code c = inter->op->c;
+ u32 rs = reg_cache[c.r.rs];
+ u8 reg_lo = get_mult_div_lo(c);
+ u8 reg_hi = get_mult_div_hi(c);
+
+ if (!op_flag_no_lo(inter->op->flags)) {
+ if (c.r.op < 32)
+ reg_cache[reg_lo] = rs << c.r.op;
+ else
+ reg_cache[reg_lo] = 0;
+ }
+
+ if (!op_flag_no_hi(inter->op->flags)) {
+ if (c.r.op >= 32)
+ reg_cache[reg_hi] = rs << (c.r.op - 32);
+ else if (c.i.op == OP_META_MULT2)
+ reg_cache[reg_hi] = (s32) rs >> (32 - c.r.op);
+ else
+ reg_cache[reg_hi] = rs >> (32 - c.r.op);
+ }
+
+ return jump_next(inter);
+}
+
static const lightrec_int_func_t int_standard[64] = {
SET_DEFAULT_ELM(int_standard, int_unimplemented),
[OP_SPECIAL] = int_SPECIAL,
[OP_META_MOV] = int_META_MOV,
[OP_META_EXTC] = int_META_EXTC,
[OP_META_EXTS] = int_META_EXTS,
+ [OP_META_MULT2] = int_META_MULT2,
+ [OP_META_MULTU2] = int_META_MULT2,
};
static const lightrec_int_func_t int_special[64] = {
pr_err("PC 0x%x is outside block at PC 0x%x\n", pc, block->pc);
+ lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
+
return 0;
}
#include "lightrec-config.h"
#include "disassembler.h"
#include "lightrec.h"
+#include "regcache.h"
#if ENABLE_THREADED_COMPILER
#include <stdatomic.h>
#define fallthrough do {} while (0) /* fall-through */
+#define container_of(ptr, type, member) \
+ ((type *)((void *)(ptr) - offsetof(type, member)))
+
+#ifdef _MSC_BUILD
+# define popcount32(x) __popcnt(x)
+# define ffs32(x) (31 - __lzcnt(x))
+#else
+# define popcount32(x) __builtin_popcount(x)
+# define ffs32(x) (__builtin_ffs(x) - 1)
+#endif
+
/* Flags for (struct block *)->flags */
#define BLOCK_NEVER_COMPILE BIT(0)
#define BLOCK_SHOULD_RECOMPILE BIT(1)
#define BLOCK_FULLY_TAGGED BIT(2)
#define BLOCK_IS_DEAD BIT(3)
#define BLOCK_IS_MEMSET BIT(4)
+#define BLOCK_NO_OPCODE_LIST BIT(5)
#define RAM_SIZE 0x200000
#define BIOS_SIZE 0x80000
u32 precompile_date;
unsigned int code_size;
u16 nb_ops;
- u8 flags;
#if ENABLE_THREADED_COMPILER
- atomic_flag op_list_freed;
+ _Atomic u8 flags;
+#else
+ u8 flags;
#endif
};
C_WRAPPER_RW_GENERIC,
C_WRAPPER_MTC,
C_WRAPPER_CP,
- C_WRAPPER_SYSCALL,
- C_WRAPPER_BREAK,
C_WRAPPERS_COUNT,
};
struct lightrec_cstate {
struct lightrec_state *state;
- struct jit_node *branches[512];
struct lightrec_branch local_branches[512];
struct lightrec_branch_target targets[512];
- unsigned int nb_branches;
unsigned int nb_local_branches;
unsigned int nb_targets;
unsigned int cycles;
struct lightrec_state {
struct lightrec_registers regs;
+ uintptr_t wrapper_regs[NUM_TEMPS];
u32 next_pc;
u32 current_cycle;
u32 target_cycle;
unsigned int nb_precompile;
unsigned int nb_maps;
const struct lightrec_mem_map *maps;
- uintptr_t offset_ram, offset_bios, offset_scratch;
+ uintptr_t offset_ram, offset_bios, offset_scratch, offset_io;
_Bool with_32bit_lut;
_Bool mirrors_mapped;
_Bool invalidate_from_dma_only;
union code lightrec_read_opcode(struct lightrec_state *state, u32 pc);
int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block);
-void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block);
+void lightrec_free_opcode_list(struct lightrec_state *state,
+ struct opcode *list);
unsigned int lightrec_cycles_of_opcode(union code code);
return a > b ? a : b;
}
+static inline _Bool block_has_flag(struct block *block, u8 flag)
+{
+#if ENABLE_THREADED_COMPILER
+ return atomic_load_explicit(&block->flags, memory_order_relaxed) & flag;
+#else
+ return block->flags & flag;
+#endif
+}
+
+static inline u8 block_set_flags(struct block *block, u8 mask)
+{
+#if ENABLE_THREADED_COMPILER
+ return atomic_fetch_or_explicit(&block->flags, mask,
+ memory_order_relaxed);
+#else
+ u8 flags = block->flags;
+
+ block->flags |= mask;
+
+ return flags;
+#endif
+}
+
+static inline u8 block_clear_flags(struct block *block, u8 mask)
+{
+#if ENABLE_THREADED_COMPILER
+ return atomic_fetch_and_explicit(&block->flags, ~mask,
+ memory_order_relaxed);
+#else
+ u8 flags = block->flags;
+
+ block->flags &= ~mask;
+
+ return flags;
+#endif
+}
+
#endif /* __LIGHTREC_PRIVATE_H__ */
struct opcode *op;
bool was_tagged;
u16 offset = (u16)arg;
+ u16 old_flags;
block = lightrec_find_block_from_lut(state->block_cache,
arg >> 16, state->next_pc);
if (unlikely(!block)) {
pr_err("rw_generic: No block found in LUT for PC 0x%x offset 0x%x\n",
state->next_pc, offset);
+ lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT);
return;
}
lightrec_rw_helper(state, op->c, &op->flags, block);
if (!was_tagged) {
- pr_debug("Opcode of block at PC 0x%08x has been tagged - flag "
- "for recompilation\n", block->pc);
+ old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE);
- block->flags |= BLOCK_SHOULD_RECOMPILE;
- lut_write(state, lut_offset(block->pc), NULL);
+ if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) {
+ pr_debug("Opcode of block at PC 0x%08x has been tagged"
+ " - flag for recompilation\n", block->pc);
+
+ lut_write(state, lut_offset(block->pc), NULL);
+ }
}
}
u32 lightrec_mfc(struct lightrec_state *state, union code op)
{
+ u32 val;
+
if (op.i.op == OP_CP0)
return state->regs.cp0[op.r.rd];
else if (op.r.rs == OP_CP2_BASIC_MFC2)
return lightrec_mfc2(state, op.r.rd);
- else
- return state->regs.cp2c[op.r.rd];
+
+ val = state->regs.cp2c[op.r.rd];
+
+ switch (op.r.rd) {
+ case 4:
+ case 12:
+ case 20:
+ case 26:
+ case 27:
+ case 29:
+ case 30:
+ return (u32)(s16)val;
+ default:
+ return val;
+ }
}
static void lightrec_mtc0(struct lightrec_state *state, u8 reg, u32 data)
lightrec_cp(state, (union code) arg);
}
-static void lightrec_syscall_cb(struct lightrec_state *state)
-{
- lightrec_set_exit_flags(state, LIGHTREC_EXIT_SYSCALL);
-}
-
-static void lightrec_break_cb(struct lightrec_state *state)
-{
- lightrec_set_exit_flags(state, LIGHTREC_EXIT_BREAK);
-}
-
static struct block * lightrec_get_block(struct lightrec_state *state, u32 pc)
{
struct block *block = lightrec_find_block(state->block_cache, pc);
+ u8 old_flags;
if (block && lightrec_block_is_outdated(state, block)) {
pr_debug("Block at PC 0x%08x is outdated!\n", block->pc);
- /* Make sure the recompiler isn't processing the block we'll
- * destroy */
- if (ENABLE_THREADED_COMPILER)
- lightrec_recompiler_remove(state->rec, block);
+ old_flags = block_set_flags(block, BLOCK_IS_DEAD);
+ if (!(old_flags & BLOCK_IS_DEAD)) {
+ /* Make sure the recompiler isn't processing the block
+ * we'll destroy */
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_recompiler_remove(state->rec, block);
+
+ lightrec_unregister_block(state->block_cache, block);
+ remove_from_code_lut(state->block_cache, block);
+ lightrec_free_block(state, block);
+ }
- lightrec_unregister_block(state->block_cache, block);
- remove_from_code_lut(state->block_cache, block);
- lightrec_free_block(state, block);
block = NULL;
}
if (unlikely(!block))
break;
- if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET)) {
+ if (OPT_REPLACE_MEMSET &&
+ block_has_flag(block, BLOCK_IS_MEMSET)) {
func = state->memset_func;
break;
}
- should_recompile = block->flags & BLOCK_SHOULD_RECOMPILE &&
- !(block->flags & BLOCK_IS_DEAD);
+ should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) &&
+ !block_has_flag(block, BLOCK_IS_DEAD);
if (unlikely(should_recompile)) {
pr_debug("Block at PC 0x%08x should recompile\n", pc);
- lightrec_unregister(MEM_FOR_CODE, block->code_size);
-
if (ENABLE_THREADED_COMPILER) {
lightrec_recompiler_add(state->rec, block);
} else {
if (likely(func))
break;
- if (unlikely(block->flags & BLOCK_NEVER_COMPILE)) {
+ if (unlikely(block_has_flag(block, BLOCK_NEVER_COMPILE))) {
pc = lightrec_emulate_block(state, block, pc);
} else if (!ENABLE_THREADED_COMPILER) {
/* Block wasn't compiled yet - run the interpreter */
- if (block->flags & BLOCK_FULLY_TAGGED)
+ if (block_has_flag(block, BLOCK_FULLY_TAGGED))
pr_debug("Block fully tagged, skipping first pass\n");
else if (ENABLE_FIRST_PASS && likely(!should_recompile))
pc = lightrec_emulate_block(state, block, pc);
state->exit_flags = LIGHTREC_EXIT_NOMEM;
return NULL;
}
+ } else if (unlikely(block_has_flag(block, BLOCK_IS_DEAD))) {
+ /*
+ * If the block is dead but has never been compiled,
+ * then its function pointer is NULL and we cannot
+ * execute the block. In that case, reap all the dead
+ * blocks now, and in the next loop we will create a
+ * new block.
+ */
+ lightrec_reaper_reap(state->reaper);
} else {
lightrec_recompiler_add(state->rec, block);
}
return func;
}
-static s32 c_function_wrapper(struct lightrec_state *state, s32 cycles_delta,
- void (*f)(struct lightrec_state *, u32), u32 arg)
-{
- state->current_cycle = state->target_cycle - cycles_delta;
-
- (*f)(state, arg);
-
- return state->target_cycle - state->current_cycle;
-}
-
static void * lightrec_alloc_code(struct lightrec_state *state, size_t size)
{
void *code;
struct block *block;
jit_state_t *_jit;
unsigned int i;
- int stack_ptr;
- jit_node_t *to_tramp, *to_fn_epilog;
jit_node_t *addr[C_WRAPPERS_COUNT - 1];
+ jit_node_t *to_end[C_WRAPPERS_COUNT - 1];
block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block));
if (!block)
jit_prolog();
jit_tramp(256);
- /* Add entry points; separate them by opcodes that increment
- * LIGHTREC_REG_STATE (since we cannot touch other registers).
- * The difference will then tell us which C function to call. */
+ /* Add entry points */
for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) {
- jit_addi(LIGHTREC_REG_STATE, LIGHTREC_REG_STATE, __WORDSIZE / 8);
+ jit_ldxi(JIT_R1, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, c_wrappers[i]));
+ to_end[i - 1] = jit_b();
addr[i - 1] = jit_indirect();
}
+ jit_ldxi(JIT_R1, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, c_wrappers[0]));
+
+ for (i = 0; i < C_WRAPPERS_COUNT - 1; i++)
+ jit_patch(to_end[i]);
+
jit_epilog();
jit_prolog();
- stack_ptr = jit_allocai(sizeof(uintptr_t) * NUM_TEMPS);
-
/* Save all temporaries on stack */
- for (i = 0; i < NUM_TEMPS; i++)
- jit_stxi(stack_ptr + i * sizeof(uintptr_t), JIT_FP, JIT_R(i));
+ for (i = 0; i < NUM_TEMPS; i++) {
+ if (i + FIRST_TEMP != 1) {
+ jit_stxi(offsetof(struct lightrec_state, wrapper_regs[i]),
+ LIGHTREC_REG_STATE, JIT_R(i + FIRST_TEMP));
+ }
+ }
- jit_getarg(JIT_R1, jit_arg());
+ jit_getarg(JIT_R2, jit_arg());
- /* Jump to the trampoline */
- to_tramp = jit_jmpi();
+ jit_prepare();
+ jit_pushargr(LIGHTREC_REG_STATE);
+ jit_pushargr(JIT_R2);
- /* The trampoline will jump back here */
- to_fn_epilog = jit_label();
+ jit_ldxi_ui(JIT_R2, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, target_cycle));
- /* Restore temporaries from stack */
- for (i = 0; i < NUM_TEMPS; i++)
- jit_ldxi(JIT_R(i), JIT_FP, stack_ptr + i * sizeof(uintptr_t));
+ /* state->current_cycle = state->target_cycle - delta; */
+ jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, LIGHTREC_REG_CYCLE);
+ jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
+ LIGHTREC_REG_STATE, LIGHTREC_REG_CYCLE);
- jit_ret();
- jit_epilog();
+ /* Call the wrapper function */
+ jit_finishr(JIT_R1);
- /* Trampoline entry point.
- * The sole purpose of the trampoline is to cheese Lightning not to
- * save/restore the callee-saved register LIGHTREC_REG_CYCLE, since we
- * do want to return to the caller with this register modified. */
- jit_prolog();
- jit_tramp(256);
- jit_patch(to_tramp);
-
- /* Retrieve the wrapper function */
- jit_ldxi(JIT_R0, LIGHTREC_REG_STATE,
- offsetof(struct lightrec_state, c_wrappers));
-
- /* Restore LIGHTREC_REG_STATE to its correct value */
- jit_movi(LIGHTREC_REG_STATE, (uintptr_t) state);
+ /* delta = state->target_cycle - state->current_cycle */;
+ jit_ldxi_ui(LIGHTREC_REG_CYCLE, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, current_cycle));
+ jit_ldxi_ui(JIT_R1, LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, target_cycle));
+ jit_subr(LIGHTREC_REG_CYCLE, JIT_R1, LIGHTREC_REG_CYCLE);
- jit_prepare();
- jit_pushargr(LIGHTREC_REG_STATE);
- jit_pushargr(LIGHTREC_REG_CYCLE);
- jit_pushargr(JIT_R0);
- jit_pushargr(JIT_R1);
- jit_finishi(c_function_wrapper);
- jit_retval_i(LIGHTREC_REG_CYCLE);
+ /* Restore temporaries from stack */
+ for (i = 0; i < NUM_TEMPS; i++) {
+ if (i + FIRST_TEMP != 1) {
+ jit_ldxi(JIT_R(i + FIRST_TEMP), LIGHTREC_REG_STATE,
+ offsetof(struct lightrec_state, wrapper_regs[i]));
+ }
+ }
- jit_patch_at(jit_jmpi(), to_fn_epilog);
+ jit_ret();
jit_epilog();
block->_jit = _jit;
block->opcode_list = NULL;
- block->flags = 0;
+ block->flags = BLOCK_NO_OPCODE_LIST;
block->nb_ops = 0;
block->function = lightrec_emit_code(state, block, _jit,
jit_prolog();
jit_frame(256);
- jit_getarg(JIT_R0, jit_arg());
+ jit_getarg(JIT_V1, jit_arg());
jit_getarg_i(LIGHTREC_REG_CYCLE, jit_arg());
/* Force all callee-saved registers to be pushed on the stack */
for (i = 0; i < NUM_REGS; i++)
- jit_movr(JIT_V(i), JIT_V(i));
+ jit_movr(JIT_V(i + FIRST_REG), JIT_V(i + FIRST_REG));
/* Pass lightrec_state structure to blocks, using the last callee-saved
* register that Lightning provides */
loop = jit_label();
/* Call the block's code */
- jit_jmpr(JIT_R0);
+ jit_jmpr(JIT_V1);
if (OPT_REPLACE_MEMSET) {
/* Blocks will jump here when they need to call
* lightrec_memset() */
addr3 = jit_indirect();
+ jit_movr(JIT_V1, LIGHTREC_REG_CYCLE);
+
jit_prepare();
jit_pushargr(LIGHTREC_REG_STATE);
jit_finishi(lightrec_memset);
jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, regs.gpr[31]));
- jit_retval(JIT_R0);
- jit_subr(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, JIT_R0);
+ jit_retval(LIGHTREC_REG_CYCLE);
+ jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE);
}
/* The block will jump here, with the number of cycles remaining in
to_end = jit_blei(LIGHTREC_REG_CYCLE, 0);
/* Convert next PC to KUNSEG and avoid mirrors */
- jit_andi(JIT_R0, JIT_V0, 0x10000000 | (RAM_SIZE - 1));
- jit_rshi_u(JIT_R1, JIT_R0, 28);
+ jit_andi(JIT_V1, JIT_V0, 0x10000000 | (RAM_SIZE - 1));
+ jit_rshi_u(JIT_R1, JIT_V1, 28);
jit_andi(JIT_R2, JIT_V0, BIOS_SIZE - 1);
jit_addi(JIT_R2, JIT_R2, RAM_SIZE);
- jit_movnr(JIT_R0, JIT_R2, JIT_R1);
+ jit_movnr(JIT_V1, JIT_R2, JIT_R1);
/* If possible, use the code LUT */
if (!lut_is_32bit(state))
- jit_lshi(JIT_R0, JIT_R0, 1);
- jit_addr(JIT_R0, JIT_R0, LIGHTREC_REG_STATE);
+ jit_lshi(JIT_V1, JIT_V1, 1);
+ jit_addr(JIT_V1, JIT_V1, LIGHTREC_REG_STATE);
offset = offsetof(struct lightrec_state, code_lut);
if (lut_is_32bit(state))
- jit_ldxi_ui(JIT_R0, JIT_R0, offset);
+ jit_ldxi_ui(JIT_V1, JIT_V1, offset);
else
- jit_ldxi(JIT_R0, JIT_R0, offset);
+ jit_ldxi(JIT_V1, JIT_V1, offset);
/* If we get non-NULL, loop */
- jit_patch_at(jit_bnei(JIT_R0, 0), loop);
+ jit_patch_at(jit_bnei(JIT_V1, 0), loop);
+
+ /* The code LUT will be set to this address when the block at the target
+ * PC has been preprocessed but not yet compiled by the threaded
+ * recompiler */
+ addr = jit_indirect();
/* Slow path: call C function get_next_block_func() */
/* We may call the interpreter - update state->current_cycle */
jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, target_cycle));
- jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE);
+ jit_subr(JIT_V1, JIT_R2, LIGHTREC_REG_CYCLE);
jit_stxi_i(offsetof(struct lightrec_state, current_cycle),
- LIGHTREC_REG_STATE, JIT_R1);
+ LIGHTREC_REG_STATE, JIT_V1);
}
- /* The code LUT will be set to this address when the block at the target
- * PC has been preprocessed but not yet compiled by the threaded
- * recompiler */
- addr = jit_indirect();
-
- /* Get the next block */
jit_prepare();
jit_pushargr(LIGHTREC_REG_STATE);
jit_pushargr(JIT_V0);
+
+ /* Save the cycles register if needed */
+ if (!(ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES))
+ jit_movr(JIT_V0, LIGHTREC_REG_CYCLE);
+
+ /* Get the next block */
jit_finishi(&get_next_block_func);
- jit_retval(JIT_R0);
+ jit_retval(JIT_V1);
if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) {
/* The interpreter may have updated state->current_cycle and
jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE,
offsetof(struct lightrec_state, target_cycle));
jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1);
+ } else {
+ jit_movr(LIGHTREC_REG_CYCLE, JIT_V0);
}
/* If we get non-NULL, loop */
- jit_patch_at(jit_bnei(JIT_R0, 0), loop);
+ jit_patch_at(jit_bnei(JIT_V1, 0), loop);
/* When exiting, the recompiled code will jump to that address */
jit_note(__FILE__, __LINE__);
block->_jit = _jit;
block->opcode_list = NULL;
- block->flags = 0;
+ block->flags = BLOCK_NO_OPCODE_LIST;
block->nb_ops = 0;
block->function = lightrec_emit_code(state, block, _jit,
return 2;
}
-void lightrec_free_opcode_list(struct lightrec_state *state, struct block *block)
+void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *ops)
{
+ struct opcode_list *list = container_of(ops, struct opcode_list, ops);
+
lightrec_free(state, MEM_FOR_IR,
- sizeof(*block->opcode_list) * block->nb_ops,
- block->opcode_list);
+ sizeof(*list) + list->nb_ops * sizeof(struct opcode),
+ list);
}
static unsigned int lightrec_get_mips_block_len(const u32 *src)
static struct opcode * lightrec_disassemble(struct lightrec_state *state,
const u32 *src, unsigned int *len)
{
- struct opcode *list;
+ struct opcode_list *list;
unsigned int i, length;
length = lightrec_get_mips_block_len(src);
- list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * length);
+ list = lightrec_malloc(state, MEM_FOR_IR,
+ sizeof(*list) + sizeof(struct opcode) * length);
if (!list) {
pr_err("Unable to allocate memory\n");
return NULL;
}
+ list->nb_ops = (u16) length;
+
for (i = 0; i < length; i++) {
- list[i].opcode = LE32TOH(src[i]);
- list[i].flags = 0;
+ list->ops[i].opcode = LE32TOH(src[i]);
+ list->ops[i].flags = 0;
}
*len = length * sizeof(u32);
- return list;
+ return list->ops;
}
static struct block * lightrec_precompile_block(struct lightrec_state *state,
{
struct opcode *list;
struct block *block;
- void *host;
+ void *host, *addr;
const struct lightrec_mem_map *map = lightrec_get_map(state, &host, kunseg(pc));
const u32 *code = (u32 *) host;
unsigned int length;
bool fully_tagged;
+ u8 block_flags = 0;
if (!map)
return NULL;
block->flags = 0;
block->code_size = 0;
block->precompile_date = state->current_cycle;
-#if ENABLE_THREADED_COMPILER
- block->op_list_freed = (atomic_flag)ATOMIC_FLAG_INIT;
-#endif
block->nb_ops = length / sizeof(u32);
lightrec_optimize(state, block);
/* If the first opcode is an 'impossible' branch, never compile the
* block */
if (should_emulate(block->opcode_list))
- block->flags |= BLOCK_NEVER_COMPILE;
+ block_flags |= BLOCK_NEVER_COMPILE;
fully_tagged = lightrec_block_is_fully_tagged(block);
if (fully_tagged)
- block->flags |= BLOCK_FULLY_TAGGED;
+ block_flags |= BLOCK_FULLY_TAGGED;
- if (OPT_REPLACE_MEMSET && (block->flags & BLOCK_IS_MEMSET))
- lut_write(state, lut_offset(pc), state->memset_func);
+ if (block_flags)
+ block_set_flags(block, block_flags);
block->hash = lightrec_calculate_block_hash(block);
+ if (OPT_REPLACE_MEMSET && block_has_flag(block, BLOCK_IS_MEMSET))
+ addr = state->memset_func;
+ else
+ addr = state->get_next_block;
+ lut_write(state, lut_offset(pc), addr);
+
pr_debug("Recompile count: %u\n", state->nb_precompile++);
return block;
lightrec_free_function(state, data);
}
+static void lightrec_reap_opcode_list(struct lightrec_state *state, void *data)
+{
+ lightrec_free_opcode_list(state, data);
+}
+
int lightrec_compile_block(struct lightrec_cstate *cstate,
struct block *block)
{
struct lightrec_state *state = cstate->state;
struct lightrec_branch_target *target;
- bool op_list_freed = false, fully_tagged = false;
+ bool fully_tagged = false;
struct block *block2;
struct opcode *elm;
jit_state_t *_jit, *oldjit;
jit_node_t *start_of_block;
bool skip_next = false;
void *old_fn, *new_fn;
+ size_t old_code_size;
unsigned int i, j;
+ u8 old_flags;
u32 offset;
fully_tagged = lightrec_block_is_fully_tagged(block);
if (fully_tagged)
- block->flags |= BLOCK_FULLY_TAGGED;
+ block_set_flags(block, BLOCK_FULLY_TAGGED);
_jit = jit_new_state();
if (!_jit)
oldjit = block->_jit;
old_fn = block->function;
+ old_code_size = block->code_size;
block->_jit = _jit;
lightrec_regcache_reset(cstate->reg_cache);
cstate->cycles = 0;
- cstate->nb_branches = 0;
cstate->nb_local_branches = 0;
cstate->nb_targets = 0;
cstate->cycles += lightrec_cycles_of_opcode(elm->c);
}
- for (i = 0; i < cstate->nb_branches; i++)
- jit_patch(cstate->branches[i]);
-
for (i = 0; i < cstate->nb_local_branches; i++) {
struct lightrec_branch *branch = &cstate->local_branches[i];
pr_err("Unable to find branch target\n");
}
- jit_patch_abs(jit_jmpi(), state->eob_wrapper_func);
jit_ret();
jit_epilog();
if (!ENABLE_THREADED_COMPILER)
pr_err("Unable to compile block!\n");
block->_jit = oldjit;
+ jit_clear_state();
_jit_destroy_state(_jit);
return -ENOMEM;
}
+ /* Pause the reaper, because lightrec_reset_lut_offset() may try to set
+ * the old block->function pointer to the code LUT. */
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_reaper_pause(state->reaper);
+
block->function = new_fn;
- block->flags &= ~BLOCK_SHOULD_RECOMPILE;
+ block_clear_flags(block, BLOCK_SHOULD_RECOMPILE);
/* Add compiled function to the LUT */
lut_write(state, lut_offset(block->pc), block->function);
- if (ENABLE_THREADED_COMPILER) {
- /* Since we might try to reap the same block multiple times,
- * we need the reaper to wait until everything has been
- * submitted, so that the duplicate entries can be dropped. */
- lightrec_reaper_pause(state->reaper);
- }
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_reaper_continue(state->reaper);
/* Detect old blocks that have been covered by the new one */
for (i = 0; i < cstate->nb_targets; i++) {
continue;
offset = block->pc + target->offset * sizeof(u32);
+
+ /* Pause the reaper while we search for the block until we set
+ * the BLOCK_IS_DEAD flag, otherwise the block may be removed
+ * under our feet. */
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_reaper_pause(state->reaper);
+
block2 = lightrec_find_block(state->block_cache, offset);
if (block2) {
/* No need to check if block2 is compilable - it must
/* Set the "block dead" flag to prevent the dynarec from
* recompiling this block */
- block2->flags |= BLOCK_IS_DEAD;
+ old_flags = block_set_flags(block2, BLOCK_IS_DEAD);
+ }
+
+ if (ENABLE_THREADED_COMPILER) {
+ lightrec_reaper_continue(state->reaper);
/* If block2 was pending for compilation, cancel it.
* If it's being compiled right now, wait until it
* finishes. */
- if (ENABLE_THREADED_COMPILER)
+ if (block2)
lightrec_recompiler_remove(state->rec, block2);
}
"0x%08x\n", block2->pc, block->pc);
/* Finally, reap the block. */
- if (ENABLE_THREADED_COMPILER) {
+ if (!ENABLE_THREADED_COMPILER) {
+ lightrec_unregister_block(state->block_cache, block2);
+ lightrec_free_block(state, block2);
+ } else if (!(old_flags & BLOCK_IS_DEAD)) {
lightrec_reaper_add(state->reaper,
lightrec_reap_block,
block2);
- } else {
- lightrec_unregister_block(state->block_cache, block2);
- lightrec_free_block(state, block2);
}
}
}
- if (ENABLE_THREADED_COMPILER)
- lightrec_reaper_continue(state->reaper);
-
if (ENABLE_DISASSEMBLER) {
pr_debug("Compiling block at PC: 0x%08x\n", block->pc);
jit_disassemble();
jit_clear_state();
-#if ENABLE_THREADED_COMPILER
if (fully_tagged)
- op_list_freed = atomic_flag_test_and_set(&block->op_list_freed);
-#endif
- if (fully_tagged && !op_list_freed) {
+ old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
+
+ if (fully_tagged && !(old_flags & BLOCK_NO_OPCODE_LIST)) {
pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc);
- lightrec_free_opcode_list(state, block);
- block->opcode_list = NULL;
+
+ if (ENABLE_THREADED_COMPILER) {
+ lightrec_reaper_add(state->reaper,
+ lightrec_reap_opcode_list,
+ block->opcode_list);
+ } else {
+ lightrec_free_opcode_list(state, block->opcode_list);
+ }
}
if (oldjit) {
_jit_destroy_state(oldjit);
lightrec_free_function(state, old_fn);
}
+
+ lightrec_unregister(MEM_FOR_CODE, old_code_size);
}
return 0;
return state->next_pc;
}
-u32 lightrec_execute_one(struct lightrec_state *state, u32 pc)
+u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc,
+ u32 target_cycle)
{
- return lightrec_execute(state, pc, state->current_cycle);
-}
-
-u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc)
-{
- struct block *block = lightrec_get_block(state, pc);
- if (!block)
- return 0;
+ struct block *block;
state->exit_flags = LIGHTREC_EXIT_NORMAL;
+ state->target_cycle = target_cycle;
+
+ do {
+ block = lightrec_get_block(state, pc);
+ if (!block)
+ break;
+
+ pc = lightrec_emulate_block(state, block, pc);
- pc = lightrec_emulate_block(state, block, pc);
+ if (ENABLE_THREADED_COMPILER)
+ lightrec_reaper_reap(state->reaper);
+ } while (state->current_cycle < state->target_cycle);
if (LOG_LEVEL >= INFO_L)
lightrec_print_info(state);
void lightrec_free_block(struct lightrec_state *state, struct block *block)
{
+ u8 old_flags;
+
lightrec_unregister(MEM_FOR_MIPS_CODE, block->nb_ops * sizeof(u32));
- if (block->opcode_list)
- lightrec_free_opcode_list(state, block);
+ old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
+
+ if (!(old_flags & BLOCK_NO_OPCODE_LIST))
+ lightrec_free_opcode_list(state, block->opcode_list);
if (block->_jit)
_jit_destroy_state(block->_jit);
if (block->function) {
state->c_wrappers[C_WRAPPER_RW_GENERIC] = lightrec_rw_generic_cb;
state->c_wrappers[C_WRAPPER_MTC] = lightrec_mtc_cb;
state->c_wrappers[C_WRAPPER_CP] = lightrec_cp_cb;
- state->c_wrappers[C_WRAPPER_SYSCALL] = lightrec_syscall_cb;
- state->c_wrappers[C_WRAPPER_BREAK] = lightrec_break_cb;
map = &state->maps[PSX_MAP_BIOS];
state->offset_bios = (uintptr_t)map->address - map->pc;
map = &state->maps[PSX_MAP_SCRATCH_PAD];
state->offset_scratch = (uintptr_t)map->address - map->pc;
+ map = &state->maps[PSX_MAP_HW_REGISTERS];
+ state->offset_io = (uintptr_t)map->address - map->pc;
+
map = &state->maps[PSX_MAP_KERNEL_USER_RAM];
state->offset_ram = (uintptr_t)map->address - map->pc;
if (state->offset_bios == 0 &&
state->offset_scratch == 0 &&
state->offset_ram == 0 &&
+ state->offset_io == 0 &&
state->mirrors_mapped) {
pr_info("Memory map is perfect. Emitted code will be best.\n");
} else {
struct lightrec_ops {
void (*cop2_op)(struct lightrec_state *state, u32 op);
void (*enable_ram)(struct lightrec_state *state, _Bool enable);
+ _Bool (*hw_direct)(u32 kaddr, _Bool is_write, u8 size);
};
struct lightrec_registers {
__api u32 lightrec_execute(struct lightrec_state *state,
u32 pc, u32 target_cycle);
-__api u32 lightrec_execute_one(struct lightrec_state *state, u32 pc);
-__api u32 lightrec_run_interpreter(struct lightrec_state *state, u32 pc);
+__api u32 lightrec_run_interpreter(struct lightrec_state *state,
+ u32 pc, u32 target_cycle);
__api void lightrec_invalidate(struct lightrec_state *state, u32 addr, u32 len);
__api void lightrec_invalidate_all(struct lightrec_state *state);
}
}
-static u64 opcode_write_mask(union code op)
+static u64 mult_div_write_mask(union code op)
{
u64 flags;
+ if (!OPT_FLAG_MULT_DIV)
+ return BIT(REG_LO) | BIT(REG_HI);
+
+ if (op.r.rd)
+ flags = BIT(op.r.rd);
+ else
+ flags = BIT(REG_LO);
+ if (op.r.imm)
+ flags |= BIT(op.r.imm);
+ else
+ flags |= BIT(REG_HI);
+
+ return flags;
+}
+
+static u64 opcode_write_mask(union code op)
+{
switch (op.i.op) {
+ case OP_META_MULT2:
+ case OP_META_MULTU2:
+ return mult_div_write_mask(op);
case OP_SPECIAL:
switch (op.r.op) {
case OP_SPECIAL_JR:
case OP_SPECIAL_MULTU:
case OP_SPECIAL_DIV:
case OP_SPECIAL_DIVU:
- if (!OPT_FLAG_MULT_DIV)
- return BIT(REG_LO) | BIT(REG_HI);
-
- if (op.r.rd)
- flags = BIT(op.r.rd);
- else
- flags = BIT(REG_LO);
- if (op.r.imm)
- flags |= BIT(op.r.imm);
- else
- flags |= BIT(REG_HI);
- return flags;
+ return mult_div_write_mask(op);
case OP_SPECIAL_MTHI:
return BIT(REG_HI);
case OP_SPECIAL_MTLO:
}
}
+static u8 opcode_get_io_size(union code op)
+{
+ switch (op.i.op) {
+ case OP_LB:
+ case OP_LBU:
+ case OP_SB:
+ return 8;
+ case OP_LH:
+ case OP_LHU:
+ case OP_SH:
+ return 16;
+ default:
+ return 32;
+ }
+}
+
bool opcode_is_io(union code op)
{
return opcode_is_load(op) || opcode_is_store(op);
known &= ~BIT(c.r.rd);
}
break;
+ case OP_SPECIAL_MULT:
+ case OP_SPECIAL_MULTU:
+ case OP_SPECIAL_DIV:
+ case OP_SPECIAL_DIVU:
+ if (OPT_FLAG_MULT_DIV && c.r.rd)
+ known &= ~BIT(c.r.rd);
+ if (OPT_FLAG_MULT_DIV && c.r.imm)
+ known &= ~BIT(c.r.imm);
+ break;
default:
break;
}
break;
+ case OP_META_MULT2:
+ case OP_META_MULTU2:
+ if (OPT_FLAG_MULT_DIV && (known & BIT(c.r.rs))) {
+ if (c.r.rd) {
+ known |= BIT(c.r.rd);
+
+ if (c.r.op < 32)
+ v[c.r.rd] = v[c.r.rs] << c.r.op;
+ else
+ v[c.r.rd] = 0;
+ }
+
+ if (c.r.imm) {
+ known |= BIT(c.r.imm);
+
+ if (c.r.op >= 32)
+ v[c.r.imm] = v[c.r.rs] << (c.r.op - 32);
+ else if (c.i.op == OP_META_MULT2)
+ v[c.r.imm] = (s32) v[c.r.rs] >> (32 - c.r.op);
+ else
+ v[c.r.imm] = v[c.r.rs] >> (32 - c.r.op);
+ }
+ } else {
+ if (OPT_FLAG_MULT_DIV && c.r.rd)
+ known &= ~BIT(c.r.rd);
+ if (OPT_FLAG_MULT_DIV && c.r.imm)
+ known &= ~BIT(c.r.imm);
+ }
+ break;
case OP_REGIMM:
break;
case OP_ADDI:
op->i.imm = offset;
}
- default: /* fall-through */
+ fallthrough;
+ default:
break;
}
}
return 0;
}
+static inline bool is_power_of_two(u32 value)
+{
+ return popcount32(value) == 1;
+}
+
static int lightrec_transform_ops(struct lightrec_state *state, struct block *block)
{
struct opcode *list = block->opcode_list;
u32 known = BIT(0);
u32 values[32] = { 0 };
unsigned int i;
+ u8 tmp;
for (i = 0; i < block->nb_ops; i++) {
prev = op;
op->r.rs = op->r.rt;
}
break;
+ case OP_SPECIAL_MULT:
+ case OP_SPECIAL_MULTU:
+ if ((known & BIT(op->r.rs)) &&
+ is_power_of_two(values[op->r.rs])) {
+ tmp = op->c.i.rs;
+ op->c.i.rs = op->c.i.rt;
+ op->c.i.rt = tmp;
+ } else if (!(known & BIT(op->r.rt)) ||
+ !is_power_of_two(values[op->r.rt])) {
+ break;
+ }
+
+ pr_debug("Multiply by power-of-two: %u\n",
+ values[op->r.rt]);
+
+ if (op->r.op == OP_SPECIAL_MULT)
+ op->i.op = OP_META_MULT2;
+ else
+ op->i.op = OP_META_MULTU2;
+
+ op->r.op = ffs32(values[op->r.rt]);
+ break;
case OP_SPECIAL_OR:
case OP_SPECIAL_ADD:
case OP_SPECIAL_ADDU:
return 0;
}
+static bool lightrec_can_switch_delay_slot(union code op, union code next_op)
+{
+ switch (op.i.op) {
+ case OP_SPECIAL:
+ switch (op.r.op) {
+ case OP_SPECIAL_JALR:
+ if (opcode_reads_register(next_op, op.r.rd) ||
+ opcode_writes_register(next_op, op.r.rd))
+ return false;
+ fallthrough;
+ case OP_SPECIAL_JR:
+ if (opcode_writes_register(next_op, op.r.rs))
+ return false;
+ fallthrough;
+ default:
+ break;
+ }
+ fallthrough;
+ case OP_J:
+ break;
+ case OP_JAL:
+ if (opcode_reads_register(next_op, 31) ||
+ opcode_writes_register(next_op, 31))
+ return false;;
+
+ break;
+ case OP_BEQ:
+ case OP_BNE:
+ if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
+ return false;
+ fallthrough;
+ case OP_BLEZ:
+ case OP_BGTZ:
+ if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
+ return false;
+ break;
+ case OP_REGIMM:
+ switch (op.r.rt) {
+ case OP_REGIMM_BLTZAL:
+ case OP_REGIMM_BGEZAL:
+ if (opcode_reads_register(next_op, 31) ||
+ opcode_writes_register(next_op, 31))
+ return false;
+ fallthrough;
+ case OP_REGIMM_BLTZ:
+ case OP_REGIMM_BGEZ:
+ if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
+ return false;
+ break;
+ }
+ fallthrough;
+ default:
+ break;
+ }
+
+ return true;
+}
+
static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block)
{
struct opcode *list, *next = &block->opcode_list[0];
!op_flag_no_ds(block->opcode_list[i - 1].flags))
continue;
- if (op_flag_sync(list->flags) || op_flag_sync(next->flags))
+ if (op_flag_sync(next->flags))
continue;
- switch (list->i.op) {
- case OP_SPECIAL:
- switch (op.r.op) {
- case OP_SPECIAL_JALR:
- if (opcode_reads_register(next_op, op.r.rd) ||
- opcode_writes_register(next_op, op.r.rd))
- continue;
- fallthrough;
- case OP_SPECIAL_JR:
- if (opcode_writes_register(next_op, op.r.rs))
- continue;
- fallthrough;
- default:
- break;
- }
- fallthrough;
- case OP_J:
- break;
- case OP_JAL:
- if (opcode_reads_register(next_op, 31) ||
- opcode_writes_register(next_op, 31))
- continue;
- else
- break;
- case OP_BEQ:
- case OP_BNE:
- if (op.i.rt && opcode_writes_register(next_op, op.i.rt))
- continue;
- fallthrough;
- case OP_BLEZ:
- case OP_BGTZ:
- if (op.i.rs && opcode_writes_register(next_op, op.i.rs))
- continue;
- break;
- case OP_REGIMM:
- switch (op.r.rt) {
- case OP_REGIMM_BLTZAL:
- case OP_REGIMM_BGEZAL:
- if (opcode_reads_register(next_op, 31) ||
- opcode_writes_register(next_op, 31))
- continue;
- fallthrough;
- case OP_REGIMM_BLTZ:
- case OP_REGIMM_BGEZ:
- if (op.i.rs &&
- opcode_writes_register(next_op, op.i.rs))
- continue;
- break;
- }
- fallthrough;
- default:
- break;
- }
+ if (!lightrec_can_switch_delay_slot(list->c, next_op))
+ continue;
pr_debug("Swap branch and delay slot opcodes "
"at offsets 0x%x / 0x%x\n",
i << 2, (i + 1) << 2);
- flags = next->flags;
+ flags = next->flags | (list->flags & LIGHTREC_SYNC);
list->c = next_op;
next->c = op;
- next->flags = list->flags | LIGHTREC_NO_DS;
+ next->flags = (list->flags | LIGHTREC_NO_DS) & ~LIGHTREC_SYNC;
list->flags = flags | LIGHTREC_NO_DS;
}
static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size)
{
- struct opcode *list;
+ struct opcode_list *list, *old_list;
if (new_size >= block->nb_ops) {
pr_err("Invalid shrink size (%u vs %u)\n",
return -EINVAL;
}
-
list = lightrec_malloc(state, MEM_FOR_IR,
- sizeof(*list) * new_size);
+ sizeof(*list) + sizeof(struct opcode) * new_size);
if (!list) {
pr_err("Unable to allocate memory\n");
return -ENOMEM;
}
- memcpy(list, block->opcode_list, sizeof(*list) * new_size);
+ old_list = container_of(block->opcode_list, struct opcode_list, ops);
+ memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size);
- lightrec_free_opcode_list(state, block);
- block->opcode_list = list;
+ lightrec_free_opcode_list(state, block->opcode_list);
+ list->nb_ops = new_size;
block->nb_ops = new_size;
+ block->opcode_list = list->ops;
pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n",
block->pc, new_size);
u32 values[32] = { 0 };
unsigned int i;
u32 val, kunseg_val;
+ bool no_mask;
for (i = 0; i < block->nb_ops; i++) {
prev = list;
kunseg(values[list->i.rs]) < (kunseg(block->pc) +
block->nb_ops * 4)) {
pr_debug("Self-modifying block detected\n");
- block->flags |= BLOCK_NEVER_COMPILE;
+ block_set_flags(block, BLOCK_NEVER_COMPILE);
list->flags |= LIGHTREC_SMC;
}
}
psx_map = lightrec_get_map_idx(state, kunseg_val);
list->flags &= ~LIGHTREC_IO_MASK;
+ no_mask = val == kunseg_val;
switch (psx_map) {
case PSX_MAP_KERNEL_USER_RAM:
- if (val == kunseg_val)
+ if (no_mask)
list->flags |= LIGHTREC_NO_MASK;
fallthrough;
case PSX_MAP_MIRROR1:
case PSX_MAP_MIRROR3:
pr_debug("Flaging opcode %u as RAM access\n", i);
list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_RAM);
+ if (no_mask && state->mirrors_mapped)
+ list->flags |= LIGHTREC_NO_MASK;
break;
case PSX_MAP_BIOS:
pr_debug("Flaging opcode %u as BIOS access\n", i);
list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_BIOS);
+ if (no_mask)
+ list->flags |= LIGHTREC_NO_MASK;
break;
case PSX_MAP_SCRATCH_PAD:
pr_debug("Flaging opcode %u as scratchpad access\n", i);
list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_SCRATCH);
+ if (no_mask)
+ list->flags |= LIGHTREC_NO_MASK;
/* Consider that we're never going to run code from
* the scratchpad. */
list->flags |= LIGHTREC_NO_INVALIDATE;
break;
+ case PSX_MAP_HW_REGISTERS:
+ if (state->ops.hw_direct &&
+ state->ops.hw_direct(kunseg_val,
+ opcode_is_store(list->c),
+ opcode_get_io_size(list->c))) {
+ pr_debug("Flagging opcode %u as direct I/O access\n",
+ i);
+ list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT_HW);
+ break;
+ }
+ fallthrough;
default:
pr_debug("Flagging opcode %u as I/O access\n",
i);
}
return mflo ? REG_LO : REG_HI;
+ case OP_META_MULT2:
+ case OP_META_MULTU2:
+ return 0;
case OP_SPECIAL:
switch (op->r.op) {
case OP_SPECIAL_MULT:
if (prev)
known = lightrec_propagate_consts(list, prev, known, values);
- if (list->i.op != OP_SPECIAL)
- continue;
-
- switch (list->r.op) {
- case OP_SPECIAL_DIV:
- case OP_SPECIAL_DIVU:
- /* If we are dividing by a non-zero constant, don't
- * emit the div-by-zero check. */
- if (lightrec_always_skip_div_check() ||
- (known & BIT(list->c.r.rt) && values[list->c.r.rt]))
- list->flags |= LIGHTREC_NO_DIV_CHECK;
+ switch (list->i.op) {
+ case OP_SPECIAL:
+ switch (list->r.op) {
+ case OP_SPECIAL_DIV:
+ case OP_SPECIAL_DIVU:
+ /* If we are dividing by a non-zero constant, don't
+ * emit the div-by-zero check. */
+ if (lightrec_always_skip_div_check() ||
+ ((known & BIT(list->c.r.rt)) && values[list->c.r.rt]))
+ list->flags |= LIGHTREC_NO_DIV_CHECK;
+ fallthrough;
+ case OP_SPECIAL_MULT:
+ case OP_SPECIAL_MULTU:
+ break;
+ default:
+ continue;
+ }
fallthrough;
- case OP_SPECIAL_MULT:
- case OP_SPECIAL_MULTU:
+ case OP_META_MULT2:
+ case OP_META_MULTU2:
break;
default:
continue;
if (i == ARRAY_SIZE(memset_code) - 1) {
/* success! */
pr_debug("Block at PC 0x%x is a memset\n", block->pc);
- block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE;
+ block_set_flags(block,
+ BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE);
/* Return non-zero to skip other optimizers. */
return 1;
struct reaper {
struct lightrec_state *state;
pthread_mutex_t mutex;
+ pthread_cond_t cond;
struct slist_elm reap_list;
+ bool running;
atomic_uint sem;
};
}
reaper->state = state;
+ reaper->running = false;
reaper->sem = 0;
slist_init(&reaper->reap_list);
ret = pthread_mutex_init(&reaper->mutex, NULL);
if (ret) {
pr_err("Cannot init mutex variable: %d\n", ret);
- lightrec_free(reaper->state, MEM_FOR_LIGHTREC,
- sizeof(*reaper), reaper);
- return NULL;
+ goto err_free_reaper;
+ }
+
+ ret = pthread_cond_init(&reaper->cond, NULL);
+ if (ret) {
+ pr_err("Cannot init cond variable: %d\n", ret);
+ goto err_destroy_mutex;
}
return reaper;
+
+err_destroy_mutex:
+ pthread_mutex_destroy(&reaper->mutex);
+err_free_reaper:
+ lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper);
+ return NULL;
}
void lightrec_reaper_destroy(struct reaper *reaper)
{
+ lightrec_reaper_reap(reaper);
+
+ pthread_cond_destroy(&reaper->cond);
pthread_mutex_destroy(&reaper->mutex);
lightrec_free(reaper->state, MEM_FOR_LIGHTREC, sizeof(*reaper), reaper);
}
while (lightrec_reaper_can_reap(reaper) &&
!!(elm = slist_first(&reaper->reap_list))) {
slist_remove(&reaper->reap_list, elm);
+ reaper->running = true;
pthread_mutex_unlock(&reaper->mutex);
reaper_elm = container_of(elm, struct reaper_elm, slist);
sizeof(*reaper_elm), reaper_elm);
pthread_mutex_lock(&reaper->mutex);
+ reaper->running = false;
+ pthread_cond_broadcast(&reaper->cond);
}
pthread_mutex_unlock(&reaper->mutex);
void lightrec_reaper_pause(struct reaper *reaper)
{
atomic_fetch_add_explicit(&reaper->sem, 1, memory_order_relaxed);
+
+ pthread_mutex_lock(&reaper->mutex);
+ while (reaper->running)
+ pthread_cond_wait(&reaper->cond, &reaper->mutex);
+ pthread_mutex_unlock(&reaper->mutex);
}
void lightrec_reaper_continue(struct reaper *reaper)
static void lightrec_cancel_list(struct recompiler *rec)
{
struct block_rec *block_rec;
- struct slist_elm *next;
-
- while (!!(next = lightrec_get_first_elm(&rec->slist))) {
- block_rec = container_of(next, struct block_rec, slist);
+ struct slist_elm *elm, *head = &rec->slist;
+ for (elm = slist_first(head); elm; elm = slist_first(head)) {
+ block_rec = container_of(elm, struct block_rec, slist);
lightrec_cancel_block_rec(rec, block_rec);
}
-
- pthread_cond_broadcast(&rec->cond2);
}
static void lightrec_flush_code_buffer(struct lightrec_state *state, void *d)
{
struct recompiler *rec = d;
- pthread_mutex_lock(&rec->mutex);
-
- if (rec->must_flush) {
- lightrec_remove_outdated_blocks(state->block_cache, NULL);
- rec->must_flush = false;
- }
-
- pthread_mutex_unlock(&rec->mutex);
+ lightrec_remove_outdated_blocks(state->block_cache, NULL);
+ rec->must_flush = false;
}
static void lightrec_compile_list(struct recompiler *rec,
pthread_mutex_unlock(&rec->mutex);
- if (likely(!(block->flags & BLOCK_IS_DEAD))) {
+ if (likely(!block_has_flag(block, BLOCK_IS_DEAD))) {
ret = lightrec_compile_block(thd->cstate, block);
if (ret == -ENOMEM) {
/* Code buffer is full. Request the reaper to
* flush it. */
pthread_mutex_lock(&rec->mutex);
+ block_rec->compiling = false;
+ pthread_cond_broadcast(&rec->cond2);
+
if (!rec->must_flush) {
+ rec->must_flush = true;
+ lightrec_cancel_list(rec);
+
lightrec_reaper_add(rec->state->reaper,
lightrec_flush_code_buffer,
rec);
- lightrec_cancel_list(rec);
- rec->must_flush = true;
}
return;
}
slist_remove(&rec->slist, next);
lightrec_free(rec->state, MEM_FOR_LIGHTREC,
sizeof(*block_rec), block_rec);
- pthread_cond_signal(&rec->cond2);
+ pthread_cond_broadcast(&rec->cond2);
}
}
/* If the block is marked as dead, don't compile it, it will be removed
* as soon as it's safe. */
- if (block->flags & BLOCK_IS_DEAD)
+ if (block_has_flag(block, BLOCK_IS_DEAD))
goto out_unlock;
for (elm = slist_first(&rec->slist), prev = NULL; elm;
* it to the top of the list, unless the block is being
* recompiled. */
if (prev && !block_rec->compiling &&
- !(block->flags & BLOCK_SHOULD_RECOMPILE)) {
+ !block_has_flag(block, BLOCK_SHOULD_RECOMPILE)) {
slist_remove_next(prev);
slist_append(&rec->slist, elm);
}
/* By the time this function was called, the block has been recompiled
* and ins't in the wait list anymore. Just return here. */
- if (block->function && !(block->flags & BLOCK_SHOULD_RECOMPILE))
+ if (block->function && !block_has_flag(block, BLOCK_SHOULD_RECOMPILE))
goto out_unlock;
block_rec = lightrec_malloc(rec->state, MEM_FOR_LIGHTREC,
/* If the block is being recompiled, push it to the end of the queue;
* otherwise push it to the front of the queue. */
- if (block->flags & BLOCK_SHOULD_RECOMPILE)
+ if (block_has_flag(block, BLOCK_SHOULD_RECOMPILE))
for (; elm->next; elm = elm->next);
slist_append(elm, &block_rec->slist);
void * lightrec_recompiler_run_first_pass(struct lightrec_state *state,
struct block *block, u32 *pc)
{
- bool freed;
+ u8 old_flags;
/* There's no point in running the first pass if the block will never
* be compiled. Let the main loop run the interpreter instead. */
- if (block->flags & BLOCK_NEVER_COMPILE)
+ if (block_has_flag(block, BLOCK_NEVER_COMPILE))
return NULL;
+ /* The block is marked as dead, and will be removed the next time the
+ * reaper is run. In the meantime, the old function can still be
+ * executed. */
+ if (block_has_flag(block, BLOCK_IS_DEAD))
+ return block->function;
+
/* If the block is already fully tagged, there is no point in running
* the first pass. Request a recompilation of the block, and maybe the
* interpreter will run the block in the meantime. */
- if (block->flags & BLOCK_FULLY_TAGGED)
+ if (block_has_flag(block, BLOCK_FULLY_TAGGED))
lightrec_recompiler_add(state->rec, block);
if (likely(block->function)) {
- if (block->flags & BLOCK_FULLY_TAGGED) {
- freed = atomic_flag_test_and_set(&block->op_list_freed);
+ if (block_has_flag(block, BLOCK_FULLY_TAGGED)) {
+ old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
- if (!freed) {
+ if (!(old_flags & BLOCK_NO_OPCODE_LIST)) {
pr_debug("Block PC 0x%08x is fully tagged"
" - free opcode list\n", block->pc);
/* The block was already compiled but the opcode list
* didn't get freed yet - do it now */
- lightrec_free_opcode_list(state, block);
- block->opcode_list = NULL;
+ lightrec_free_opcode_list(state, block->opcode_list);
}
}
/* Mark the opcode list as freed, so that the threaded compiler won't
* free it while we're using it in the interpreter. */
- freed = atomic_flag_test_and_set(&block->op_list_freed);
+ old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
/* Block wasn't compiled yet - run the interpreter */
*pc = lightrec_emulate_block(state, block, *pc);
- if (!freed)
- atomic_flag_clear(&block->op_list_freed);
+ if (!(old_flags & BLOCK_NO_OPCODE_LIST))
+ block_clear_flags(block, BLOCK_NO_OPCODE_LIST);
/* The block got compiled while the interpreter was running.
* We can free the opcode list now. */
- if (block->function && (block->flags & BLOCK_FULLY_TAGGED) &&
- !atomic_flag_test_and_set(&block->op_list_freed)) {
- pr_debug("Block PC 0x%08x is fully tagged"
- " - free opcode list\n", block->pc);
+ if (block->function && block_has_flag(block, BLOCK_FULLY_TAGGED)) {
+ old_flags = block_set_flags(block, BLOCK_NO_OPCODE_LIST);
- lightrec_free_opcode_list(state, block);
- block->opcode_list = NULL;
+ if (!(old_flags & BLOCK_NO_OPCODE_LIST)) {
+ pr_debug("Block PC 0x%08x is fully tagged"
+ " - free opcode list\n", block->pc);
+
+ lightrec_free_opcode_list(state, block->opcode_list);
+ }
}
return NULL;
#include <stdbool.h>
#include <stddef.h>
+enum reg_priority {
+ REG_IS_TEMP,
+ REG_IS_TEMP_VALUE,
+ REG_IS_ZERO,
+ REG_IS_LOADED,
+ REG_IS_DIRTY,
+
+ REG_NB_PRIORITIES,
+};
+
struct native_register {
- bool used, loaded, dirty, output, extend, extended,
+ bool used, output, extend, extended,
zero_extend, zero_extended, locked;
s8 emulated_register;
+ intptr_t value;
+ enum reg_priority prio;
};
struct regcache {
const struct native_register *nreg)
{
u8 offset = lightrec_reg_number(cache, nreg);
- return offset < NUM_REGS ? JIT_V(offset) : JIT_R(offset - NUM_REGS);
+
+ if (offset < NUM_REGS)
+ return JIT_V(FIRST_REG + offset);
+ else
+ return JIT_R(FIRST_TEMP + offset - NUM_REGS);
}
static inline struct native_register * lightning_reg_to_lightrec(
if ((JIT_V0 > JIT_R0 && reg >= JIT_V0) ||
(JIT_V0 < JIT_R0 && reg < JIT_R0)) {
if (JIT_V1 > JIT_V0)
- return &cache->lightrec_regs[reg - JIT_V0];
+ return &cache->lightrec_regs[reg - JIT_V(FIRST_REG)];
else
- return &cache->lightrec_regs[JIT_V0 - reg];
+ return &cache->lightrec_regs[JIT_V(FIRST_REG) - reg];
} else {
if (JIT_R1 > JIT_R0)
- return &cache->lightrec_regs[NUM_REGS + reg - JIT_R0];
+ return &cache->lightrec_regs[NUM_REGS + reg - JIT_R(FIRST_TEMP)];
else
- return &cache->lightrec_regs[NUM_REGS + JIT_R0 - reg];
+ return &cache->lightrec_regs[NUM_REGS + JIT_R(FIRST_TEMP) - reg];
}
}
static struct native_register * alloc_temp(struct regcache *cache)
{
+ struct native_register *elm, *nreg = NULL;
+ enum reg_priority best = REG_NB_PRIORITIES;
unsigned int i;
/* We search the register list in reverse order. As temporaries are
* caller-saved registers, as they won't have to be saved back to
* memory. */
for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) {
- struct native_register *nreg = &cache->lightrec_regs[i - 1];
- if (!nreg->used && !nreg->loaded && !nreg->dirty)
- return nreg;
- }
+ elm = &cache->lightrec_regs[i - 1];
- for (i = ARRAY_SIZE(cache->lightrec_regs); i; i--) {
- struct native_register *nreg = &cache->lightrec_regs[i - 1];
- if (!nreg->used)
- return nreg;
+ if (!elm->used && elm->prio < best) {
+ nreg = elm;
+ best = elm->prio;
+
+ if (best == REG_IS_TEMP)
+ break;
+ }
}
- return NULL;
+ return nreg;
}
static struct native_register * find_mapped_reg(struct regcache *cache,
for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
struct native_register *nreg = &cache->lightrec_regs[i];
- if ((!reg || nreg->loaded || nreg->dirty) &&
- nreg->emulated_register == reg &&
- (!out || !nreg->locked))
+ if ((nreg->prio >= REG_IS_ZERO) &&
+ nreg->emulated_register == reg &&
+ (!out || !nreg->locked))
return nreg;
}
static struct native_register * alloc_in_out(struct regcache *cache,
u8 reg, bool out)
{
- struct native_register *nreg;
+ struct native_register *elm, *nreg = NULL;
+ enum reg_priority best = REG_NB_PRIORITIES;
unsigned int i;
/* Try to find if the register is already mapped somewhere */
if (nreg)
return nreg;
- /* Try to allocate a non-dirty, non-loaded register.
- * Loaded registers may be re-used later, so it's better to avoid
- * re-using one if possible. */
- for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
- nreg = &cache->lightrec_regs[i];
- if (!nreg->used && !nreg->dirty && !nreg->loaded)
- return nreg;
- }
+ nreg = NULL;
- /* Try to allocate a non-dirty register */
for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
- nreg = &cache->lightrec_regs[i];
- if (!nreg->used && !nreg->dirty)
- return nreg;
- }
+ elm = &cache->lightrec_regs[i];
- for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
- nreg = &cache->lightrec_regs[i];
- if (!nreg->used)
- return nreg;
+ if (!elm->used && elm->prio < best) {
+ nreg = elm;
+ best = elm->prio;
+
+ if (best == REG_IS_TEMP)
+ break;
+ }
}
- return NULL;
+ return nreg;
}
static void lightrec_discard_nreg(struct native_register *nreg)
{
nreg->extended = false;
nreg->zero_extended = false;
- nreg->loaded = false;
nreg->output = false;
- nreg->dirty = false;
nreg->used = false;
nreg->locked = false;
nreg->emulated_register = -1;
+ nreg->prio = 0;
}
static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit,
struct native_register *nreg, u8 jit_reg)
{
/* If we get a dirty register, store back the old value */
- if (nreg->dirty) {
+ if (nreg->prio == REG_IS_DIRTY) {
s16 offset = offsetof(struct lightrec_state, regs.gpr)
+ (nreg->emulated_register << 2);
lightrec_unload_nreg(cache, _jit, reg, jit_reg);
reg->used = true;
+ reg->prio = REG_IS_LOADED;
return jit_reg;
}
jit_reg = lightrec_reg_to_lightning(cache, nreg);
lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
+ nreg->prio = REG_IS_TEMP;
nreg->used = true;
return jit_reg;
}
+s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value)
+{
+ struct native_register *nreg;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->lightrec_regs); i++) {
+ nreg = &cache->lightrec_regs[i];
+
+ if (nreg->prio == REG_IS_TEMP_VALUE && nreg->value == value) {
+ nreg->used = true;
+ return lightrec_reg_to_lightning(cache, nreg);
+ }
+ }
+
+ return -1;
+}
+
+void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value)
+{
+ struct native_register *nreg;
+
+ nreg = lightning_reg_to_lightrec(cache, jit_reg);
+
+ nreg->prio = REG_IS_TEMP_VALUE;
+ nreg->value = value;
+}
+
u8 lightrec_alloc_reg_out(struct regcache *cache, jit_state_t *_jit,
u8 reg, u8 flags)
{
nreg->emulated_register = reg;
nreg->extend = flags & REG_EXT;
nreg->zero_extend = flags & REG_ZEXT;
+ nreg->prio = reg ? REG_IS_LOADED : REG_IS_ZERO;
return jit_reg;
}
if (reg_changed)
lightrec_unload_nreg(cache, _jit, nreg, jit_reg);
- if (!nreg->loaded && !nreg->dirty && reg != 0) {
+ if (nreg->prio < REG_IS_LOADED && reg != 0) {
s16 offset = offsetof(struct lightrec_state, regs.gpr)
+ (reg << 2);
else
jit_ldxi_i(jit_reg, LIGHTREC_REG_STATE, offset);
- nreg->loaded = true;
+ nreg->prio = REG_IS_LOADED;
}
/* Clear register r0 before use */
- if (reg == 0 && (!nreg->loaded || nreg->dirty)) {
+ if (reg == 0 && nreg->prio != REG_IS_ZERO) {
jit_movi(jit_reg, 0);
nreg->extended = true;
nreg->zero_extended = true;
- nreg->loaded = true;
+ nreg->prio = REG_IS_ZERO;
}
nreg->used = true;
nreg->extended = true;
nreg->zero_extended = false;
nreg->used = true;
- nreg->loaded = true;
nreg->emulated_register = reg;
+ nreg->prio = REG_IS_LOADED;
return jit_reg;
}
{
/* Set output registers as dirty */
if (nreg->used && nreg->output && nreg->emulated_register > 0)
- nreg->dirty = true;
+ nreg->prio = REG_IS_DIRTY;
if (nreg->output) {
nreg->extended = nreg->extend;
nreg->zero_extended = nreg->zero_extend;
static void clean_reg(jit_state_t *_jit,
struct native_register *nreg, u8 jit_reg, bool clean)
{
- if (nreg->dirty) {
+ if (nreg->prio == REG_IS_DIRTY) {
s16 offset = offsetof(struct lightrec_state, regs.gpr)
+ (nreg->emulated_register << 2);
jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg);
- nreg->loaded |= nreg->dirty;
- nreg->dirty ^= clean;
+
+ if (clean) {
+ if (nreg->emulated_register == 0)
+ nreg->prio = REG_IS_ZERO;
+ else
+ nreg->prio = REG_IS_LOADED;
+ }
}
}
{
unsigned int i;
- for (i = 0; i < NUM_REGS; i++)
- clean_reg(_jit, &cache->lightrec_regs[i], JIT_V(i), clean);
+ for (i = 0; i < NUM_REGS; i++) {
+ clean_reg(_jit, &cache->lightrec_regs[i],
+ JIT_V(FIRST_REG + i), clean);
+ }
for (i = 0; i < NUM_TEMPS; i++) {
clean_reg(_jit, &cache->lightrec_regs[i + NUM_REGS],
- JIT_R(i), clean);
+ JIT_R(FIRST_TEMP + i), clean);
}
}
clean_regs(cache, _jit, true);
}
+bool lightrec_has_dirty_regs(struct regcache *cache)
+{
+ unsigned int i;
+
+ for (i = 0; i < NUM_REGS + NUM_TEMPS; i++)
+ if (cache->lightrec_regs[i].prio == REG_IS_DIRTY)
+ return true;
+
+ return false;
+}
+
void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg)
{
struct native_register *reg;
for (i = 0; i < NUM_REGS; i++) {
nreg = &cache->lightrec_regs[i];
- if (nreg->used || nreg->loaded || nreg->dirty)
- jit_live(JIT_V(i));
+ if (nreg->used || nreg->prio > REG_IS_TEMP)
+ jit_live(JIT_V(FIRST_REG + i));
}
#endif
for (i = 0; i < NUM_TEMPS; i++) {
nreg = &cache->lightrec_regs[NUM_REGS + i];
- if (nreg->used || nreg->loaded || nreg->dirty)
- jit_live(JIT_R(i));
+ if (nreg->used || nreg->prio > REG_IS_TEMP)
+ jit_live(JIT_R(FIRST_TEMP + i));
}
+
+ jit_live(LIGHTREC_REG_STATE);
+ jit_live(LIGHTREC_REG_CYCLE);
}
#ifndef __REGCACHE_H__
#define __REGCACHE_H__
-#include "lightrec-private.h"
+#include "lightning-wrapper.h"
-#define NUM_REGS (JIT_V_NUM - 2)
-#define NUM_TEMPS (JIT_R_NUM)
+#define NUM_REGS (JIT_V_NUM - 1)
#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1))
-#define LIGHTREC_REG_CYCLE (JIT_V(JIT_V_NUM - 2))
+
+#if defined(__powerpc__)
+# define NUM_TEMPS JIT_R_NUM
+/* JIT_R0 is callee-saved on PowerPC, we have to use something else */
+# define LIGHTREC_REG_CYCLE _R10
+# define FIRST_TEMP 0
+#else
+# define NUM_TEMPS (JIT_R_NUM - 1)
+# define LIGHTREC_REG_CYCLE JIT_R0
+# define FIRST_TEMP 1
+#endif
+
+#include "lightrec-private.h"
+
+#define FIRST_REG 0
/* Flags for lightrec_alloc_reg_in / lightrec_alloc_reg_out. */
#define REG_EXT BIT(0) /* register is sign-extended */
u8 lightrec_request_reg_in(struct regcache *cache, jit_state_t *_jit,
u8 reg, u8 jit_reg);
+s8 lightrec_get_reg_with_value(struct regcache *cache, intptr_t value);
+void lightrec_temp_set_value(struct regcache *cache, u8 jit_reg, intptr_t value);
+
u8 lightrec_get_reg_in_flags(struct regcache *cache, u8 jit_reg);
void lightrec_set_reg_out_flags(struct regcache *cache, u8 jit_reg, u8 flags);
void lightrec_clean_regs(struct regcache *cache, jit_state_t *_jit);
void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg);
void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit);
+_Bool lightrec_has_dirty_regs(struct regcache *cache);
void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit,
u8 reg, _Bool unload);
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
+#include <pthread.h>
#if defined(__hpux) && defined(__hppa__)
# include <machine/param.h>
#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
#endif
+ jit_code_casr, jit_code_casi,
+#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x)
+#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x)
+
jit_code_last_code
} jit_code_t;
extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t,
jit_int32_t, jit_int32_t,
jit_word_t, jit_word_t);
+#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h)
+extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t,
+ jit_word_t, jit_word_t,
+ jit_int32_t, jit_int32_t);
#define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w)
extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t,
jit_word_t, jit_word_t, jit_float32_t);
static bool use_lightrec_interpreter;
static bool use_pcsx_interpreter;
-static bool lightrec_debug;
-static bool lightrec_very_debug;
static bool booting;
static u32 lightrec_begin_cycles;
memcpy(cache_buf, psxM, sizeof(cache_buf));
}
+static bool lightrec_can_hw_direct(u32 kaddr, bool is_write, u8 size)
+{
+ switch (size) {
+ case 8:
+ switch (kaddr) {
+ case 0x1f801040:
+ case 0x1f801050:
+ case 0x1f801800:
+ case 0x1f801801:
+ case 0x1f801802:
+ case 0x1f801803:
+ return false;
+ default:
+ return true;
+ }
+ case 16:
+ switch (kaddr) {
+ case 0x1f801040:
+ case 0x1f801044:
+ case 0x1f801048:
+ case 0x1f80104a:
+ case 0x1f80104e:
+ case 0x1f801050:
+ case 0x1f801054:
+ case 0x1f80105a:
+ case 0x1f80105e:
+ case 0x1f801100:
+ case 0x1f801104:
+ case 0x1f801108:
+ case 0x1f801110:
+ case 0x1f801114:
+ case 0x1f801118:
+ case 0x1f801120:
+ case 0x1f801124:
+ case 0x1f801128:
+ return false;
+ case 0x1f801070:
+ case 0x1f801074:
+ return !is_write;
+ default:
+ return is_write || kaddr < 0x1f801c00 || kaddr >= 0x1f801e00;
+ }
+ default:
+ switch (kaddr) {
+ case 0x1f801040:
+ case 0x1f801050:
+ case 0x1f801100:
+ case 0x1f801104:
+ case 0x1f801108:
+ case 0x1f801110:
+ case 0x1f801114:
+ case 0x1f801118:
+ case 0x1f801120:
+ case 0x1f801124:
+ case 0x1f801128:
+ case 0x1f801810:
+ case 0x1f801814:
+ case 0x1f801820:
+ case 0x1f801824:
+ return false;
+ case 0x1f801070:
+ case 0x1f801074:
+ case 0x1f801088:
+ case 0x1f801098:
+ case 0x1f8010a8:
+ case 0x1f8010b8:
+ case 0x1f8010c8:
+ case 0x1f8010e8:
+ case 0x1f8010f4:
+ return !is_write;
+ default:
+ return !is_write || kaddr < 0x1f801c00 || kaddr >= 0x1f801e00;
+ }
+ }
+}
+
static const struct lightrec_ops lightrec_ops = {
.cop2_op = cop2_op,
.enable_ram = lightrec_enable_ram,
+ .hw_direct = lightrec_can_hw_direct,
};
static int lightrec_plugin_init(void)
lightrec_map[PSX_MAP_MIRROR1].address = psxM + 0x200000;
lightrec_map[PSX_MAP_MIRROR2].address = psxM + 0x400000;
lightrec_map[PSX_MAP_MIRROR3].address = psxM + 0x600000;
+ lightrec_map[PSX_MAP_HW_REGISTERS].address = psxH + 0x1000;
lightrec_map[PSX_MAP_CODE_BUFFER].address = code_buffer;
}
- lightrec_debug = !!getenv("LIGHTREC_DEBUG");
- lightrec_very_debug = !!getenv("LIGHTREC_VERY_DEBUG");
use_lightrec_interpreter = !!getenv("LIGHTREC_INTERPRETER");
if (getenv("LIGHTREC_BEGIN_CYCLES"))
lightrec_begin_cycles = (unsigned int) strtol(
return 0;
}
-static u32 hash_calculate_le(const void *buffer, u32 count)
-{
- unsigned int i;
- u32 *data = (u32 *) buffer;
- u32 hash = 0xffffffff;
-
- count /= 4;
- for(i = 0; i < count; ++i) {
- hash += LE32TOH(data[i]);
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
-
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
- return hash;
-}
-
-static u32 hash_calculate(const void *buffer, u32 count)
-{
- unsigned int i;
- u32 *data = (u32 *) buffer;
- u32 hash = 0xffffffff;
-
- count /= 4;
- for(i = 0; i < count; ++i) {
- hash += data[i];
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
-
- hash += (hash << 3);
- hash ^= (hash >> 11);
- hash += (hash << 15);
- return hash;
-}
-
-static const char * const mips_regs[] = {
- "zero",
- "at",
- "v0", "v1",
- "a0", "a1", "a2", "a3",
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
- "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
- "t8", "t9",
- "k0", "k1",
- "gp", "sp", "fp", "ra",
- "lo", "hi",
-};
-
-static void print_for_big_ass_debugger(void)
-{
- unsigned int i;
-
- printf("CYCLE 0x%08x PC 0x%08x", psxRegs.cycle, psxRegs.pc);
-
- if (lightrec_very_debug)
- printf(" RAM 0x%08x SCRATCH 0x%08x HW 0x%08x",
- hash_calculate_le(psxM, 0x200000),
- hash_calculate_le(psxH, 0x400),
- hash_calculate_le(psxH + 0x1000, 0x2000));
-
- printf(" CP0 0x%08x CP2D 0x%08x CP2C 0x%08x INT 0x%04x INTCYCLE 0x%08x GPU 0x%08x",
- hash_calculate(&psxRegs.CP0.r,
- sizeof(psxRegs.CP0.r)),
- hash_calculate(&psxRegs.CP2D.r,
- sizeof(psxRegs.CP2D.r)),
- hash_calculate(&psxRegs.CP2C.r,
- sizeof(psxRegs.CP2C.r)),
- psxRegs.interrupt,
- hash_calculate(psxRegs.intCycle,
- sizeof(psxRegs.intCycle)),
- LE32TOH(HW_GPU_STATUS));
-
- if (lightrec_very_debug)
- for (i = 0; i < 34; i++)
- printf(" %s 0x%08x", mips_regs[i], psxRegs.GPR.r[i]);
- else
- printf(" GPR 0x%08x", hash_calculate(&psxRegs.GPR.r,
- sizeof(psxRegs.GPR.r)));
- printf("\n");
-}
-
static void lightrec_dump_regs(struct lightrec_state *state)
{
struct lightrec_registers *regs = lightrec_get_registers(state);
gen_interupt();
+ // step during early boot so that 0x80030000 fastboot hack works
+ if (booting)
+ next_interupt = psxRegs.cycle;
+
if (use_pcsx_interpreter) {
intExecuteBlock();
} else {
lightrec_reset_cycle_count(lightrec_state, psxRegs.cycle);
lightrec_restore_regs(lightrec_state);
- if (unlikely(use_lightrec_interpreter))
+ if (unlikely(use_lightrec_interpreter)) {
psxRegs.pc = lightrec_run_interpreter(lightrec_state,
- psxRegs.pc);
- // step during early boot so that 0x80030000 fastboot hack works
- else if (unlikely(booting || lightrec_debug))
- psxRegs.pc = lightrec_execute_one(lightrec_state,
- psxRegs.pc);
- else
+ psxRegs.pc,
+ next_interupt);
+ } else {
psxRegs.pc = lightrec_execute(lightrec_state,
psxRegs.pc, next_interupt);
+ }
psxRegs.cycle = lightrec_current_cycle_count(lightrec_state);
booting = false;
}
- if (lightrec_debug && psxRegs.cycle >= lightrec_begin_cycles
- && psxRegs.pc != old_pc)
- print_for_big_ass_debugger();
-
if ((psxRegs.CP0.n.Cause & psxRegs.CP0.n.Status & 0x300) &&
(psxRegs.CP0.n.Status & 0x1)) {
/* Handle software interrupts */