[subrepo]
remote = https://github.com/pcercuei/gnu_lightning.git
branch = pcsx_rearmed
- commit = b910a469a9bea63056eb53430dea4c7b56e447a8
- parent = 13b02197fcb7575646408094d5583ed7391b1153
+ commit = b1983e9036d35933ffa773d81b61eedbf3ae3b93
+ parent = 638335fabe3ba77b2a5c624a4c4aec52c18488f7
method = merge
cmdver = 0.4.3
+2023-02-23 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h: Add new 'inst' field to
+ jit_compiler_t, if __mips__ is defined. This field is a simple
+ helper for a pending instruction to be emitted, and that can
+ be emitted out of order.
+ * lib/jit_fallback.c: Update for changes in internal mips patching
+ and jumping macros and function calls.
+ * lib/jit_mips-cpu.c: Core of changes to attempt to fill delay
+ slots with instructions that can be emitted out of order.
+ * lib/jit_mips-fpu.c: Update to use delay slot in branches.
+ * lib/jit_mips.c: Update for new delay slot use logic.
+
+2023-02-20 Paulo Andrade <pcpa@gnu.org>
+
+ * check/float.tst: Add conditionals for mips release for expected
+ NaN truncated to an integer.
+ * check/lightning.c: Add extra preprocessor for mips release.
+ * include/lightning/jit_mips.h: Make the NEW_ABI preprocessor
+ defined to zero if using the n32 or n64 abis. This makes it
+ easier to create runtime checks with an always true or false
+ condition.
+ * lib/jit_mips-cpu.c, lib/jit_mips-fpu.c: Implement mips release
+ 6 support.
+ * lib/jit_mips.c: Add more reliable mips release detection code.
+
+2023-02-09 Paulo Andrade <pcpa@gnu.org>
+
+ * check/Makefile.am: Update for new bit.tst test, to check the
+ new clor, clzr, ctor and ctzr instructions.
+ * check/all.tst: Update to verify encoding of new instructions.
+ * check/lightning.c: Update to have the lightning "assembler"
+ understanding the new instructions.
+ * include/lightning.h.in: Define new codes for new instructions.
+ * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c,
+ lib/jit_ia64.c, lib/jit_loongarch.c, lib/jit_mips.c, lib/jit_ppc.c,
+ lib/jit_riscv.c, lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c:
+ Implement fallback version of new instructions.
+ * lib/jit_fallback.c: Actual implementation of the fallbacks of
+ the new instructions.
+ * lib/jit_names.c: Update to print debug information of new
+ instructions.
+
+2023-01-26 Paulo Andrade <pcpa@gnu.org>
+
+ * check/riprel.c, check/riprel.ok: New check files.
+ * check/Makefile.am: Support for new riprel test.
+ * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86.c: Implement
+ %rip relative addressing when reliable. Currently disabled for
+ x32 and _WIN32; could be added for positive relative addresses
+ only where it should work.
+ * lib/lightning.c: Correct problem added in previous patch due
+ to not testing on a 32 bit environment.
+
+2023-01-23 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips-cpu.c, lib/jit_mips-cpu.c: Use pseudo instructions
+ "b" (BEQ(0,0,disp)) and "bal" (BGEZAL(0,disp)) for mips2, when an
+ unconditional branch or function call is known to be in range of a
+ relative jump. This should significantly reduce jit size generation.
+
+2023-01-20 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_mips-cpu.c, lib/jit_mips.c, lib/jit_rewind.c: Adapt
+ code to implement a variable framesize and optimize frame pointer
+ for simple leaf functions.
+
+2023-01-19 Paulo Andrade <pcpa@gnu.org>
+
+ * lib/jit_riscv.c, lib/jit_riscv-cpu.c: Adapt code to use a
+ variable framesize. Previously it was aligning the stack at
+ 8 bytes, not 16. Now functions are called with a 16 byte aligned
+ stack.
+
+2023-01-18 Paulo Andrade <pcpa@gnu.org>
+
+ * include/lightning/jit_private.h: Include new framesize field
+ of jit_compiler_t; add new alist field for jit_function_t; add
+ new cvt_offset and need_stack fields specific to x86.
+ * lib/jit_x86.c, lib/jit_x86-cpu: Rewrite code to create stack
+ frames, so that less stack space can be used if no, or very few
+ callee save registers are modified in a function.
+ * jit_x86-sse.c, jit_x86-x87.c: Make CVT_OFFSET variable, and
+ dynamically allocated; this is required to avoid needing to
+ modify twice %rsp at function prologs, even if no stack space
+ is used.
+
+2022-11-09 Paulo Andrade <pcpa@gnu.org>
+
+ * configure.ac: Add new --enable-devel-strong-type-checking
+ option.
+ * include/lightning.h.in: Rework to not need to know if
+ PACKED_STACK is defined, and add a new argument to _jit_arg,
+ _jit_putarg{r,i}, _jit_pusharg{r,i} and _jit_ret{r,i} to have
+ the same code path if PACKED_STACK is defined or not, and also
+ to implement STRONG_TYPE_CHECK enabled with the new
+ --enable-devel-strong-type-checking.
+ * include/lightning/jit_private.h: Add new macros to add assertions
+ for STRONG_TYPE_CHECK and avoid pasting tokens in jit_inc_synth*
+ when the token is not a static known value.
+ * lib/jit_aarch64.c: The first implementation of the new code,
+ working correctly in Apple M1 and with and without STRONG_TYPE_CHECK
+ in Linux.
+
+2022-11-08 Paulo Andrade <pcpa@gnu.org>
+
+ Add support for packed stack arguments as used by Apple M1
+ aarch64 cpus. This requires a major redesign in how Lightning
+ works, because contrary to all other supported ports, in this
+ case arguments must be truncated and sign/zero extended if
+ passed in registers, but when receiving the argument, there
+ is no need to truncate and sign/zero extend.
+ Return values are also treated this way. The callee must
+ truncate sign/zero extend, not the caller.
+ check/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS.
+ check/all.tst: Implement paired arg/getarg/pusharg/putarg/ret
+ codes to validate they do not generate assertions.
+ * check/allocar.tst, check/call.tst, check/fib.tst, check/put.tst,
+ check/stack.tst: Update to pass in all build types.
+ check/lightning.c: Add new codes for extra codes to handle
+ packed stack.
+ * configure.ac: Add a preprocessor define to know if packed stack
+ need is required. This is not really used, as it was moved to
+ jit_aarch64.h.
+ * doc/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS.
+ * doc/rpn.c: Update to pass in all build types.
+ include/lightning.h.in: Add new codes and reorder enum.
+ * include/lightning/jit_aarch64.h: Detect condition of needing
+ a packed stack.
+ * lib/jit_aarch64-sz.c: Regenerate.
+ * lib/jit_aarch64.c: Major updates for packed stack.
+ * lib/jit_names.c: Updates for debug output.
+ * lib/lightning.c: Update for new codes.
+
+2022-10-31 Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
+
+ Add new skip instruction.
+ * .gitignore: Update from Gnulib.
+ * check/Makefile.am: Add tests.
+ * check/lightning.c: Handle skip instructions.
+ * check/protect.c: Rewrite with skip.
+ * check/skip.ok: New test.
+ * check/skip.tst: New test.
+ * doc/body.texi: Document the skip instruction.
+ * include/lightning.h.in: Add the skip instruction.
+ * lib/jit_aarch64-sz.c: Update for skip instruction.
+ * lib/jit_aarch64.c: Implement skip instruction.
+ * lib/jit_alpha-sz.c: Update for skip instruction.
+ * lib/jit_alpha.c: Implement skip instruction.
+ * lib/jit_arm-sz.c: Update for skip instruction.
+ * lib/jit_arm.c: Implement skip instruction.
+ * lib/jit_hppa-sz.c: Update for skip instruction.
+ * lib/jit_hppa.c: Implement skip instruction.
+ * lib/jit_ia64-sz.c: Update for skip instruction.
+ * lib/jit_ia64.c: Implement skip instruction.
+ * lib/jit_loongarch-sz.c: Update for skip instruction.
+ * lib/jit_loongarch.c: Implement skip instruction.
+ * lib/jit_mips-sz.c: Update for skip instruction.
+ * lib/jit_mips.c: Implement skip instruction.
+ * lib/jit_names.c: Update for skip instruction.
+ * lib/jit_ppc-sz.c: Update for skip instruction.
+ * lib/jit_ppc.c: Implement skip instruction.
+ * lib/jit_riscv-sz.c: Update for skip instruction.
+ * lib/jit_riscv.c: Implement skip instruction.
+ * lib/jit_s390-sz.c: Update for skip instruction.
+ * lib/jit_s390.c: Implement skip instruction.
+ * lib/jit_size.c: Treat align and skip in a special way.
+ * lib/jit_sparc-sz.c: Update for skip instruction.
+ * lib/jit_sparc.c: Implement skip instruction.
+ * lib/jit_x86-sz.c: Update for skip instruction.
+ * lib/jit_x86.c: Implement skip instruction.
+ * lib/lightning.c: Classify skip instruction.
+
+2022-10-30 Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
+
+ Add user-visible functions jit_protect and jit_unprotect.
+ * check/Makefile.am: Add test for jit_protect and jit_unprotect.
+ * check/protect.c: New test.
+ * doc/body.texi: Add documentation for jit_protect and
+ jit_unprotect.
+ * include/lightning.h.in: Add prototypes for jit_protect and
+ jit_unprotect.
+ * include/lightning/jit_private.h: Add a field to store the size
+ of the protected memory.
+ * lib/lightning.c: Remember the size of the protected memory and
+ implement the two new functions.
+
2022-10-12 Paulo Andrade <pcpa@gnu.org>
* include/lightning/jit_loongarch.h, lib/jit_loongarch-cpu.c,
#
-# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc.
#
# This file is part of GNU lightning.
#
Jon Arintok <jon.arintok@gmail.com>
Bruno Haible <bruno@clisp.org>
Marc Nieper-Wißkirchen <marc@nieper-wisskirchen.de>
+Paul Cercueil <paul@crapouillou.net>
- * Validate that divrem in jit_x86-cpu.c is not modifying
- the non result arguments. This is not verified by clobber.tst,
- as it only checks registers not involved in the operation
- (because it does not know about values being set as input
- for the the operation).
- * Write a simple higher level language implementation generating
- jit with lightning, that could be some lisp or C like language.
-
- * rerun ./configure --enable-devel-get-jit-size and regenerate
- the related jit_$arch-sz.c for the ports where nodata is
- meaningful:
- hppa (done)
- i586 (done)
- ia64
- mips o32 (done)
- mips n32
- mips n64
- powerpc 32 (done)
- powerpc 64 (done)
- ppc
- s390x (done)
- sparc (done)
- x86_64 (done)
- Missing ones are due to no longer (remote) access to such hosts
- and may be broken with jit_set_data(..., JIT_DISABLE_DATA).
- (ia64 hp-ux or linx), (irix mips for 32 or 64 abi), and
- (darwin ppc).
#
-# Copyright 2012-2022 Free Software Foundation, Inc.
+# Copyright 2012-2023 Free Software Foundation, Inc.
#
# This file is part of GNU lightning.
#
# License for more details.
#
-AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
+AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+ -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \
- catomic
+ catomic protect riprel
lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
lightning_SOURCES = lightning.c
catomic_LDADD = $(top_builddir)/lib/liblightning.la -lm -lpthread $(SHLIB)
catomic_SOURCES = catomic.c
+protect_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+protect_SOURCES = protect.c
+
+riprel_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+riprel_SOURCES = riprel.c
+
$(top_builddir)/lib/liblightning.la:
cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
range.tst range.ok \
ranger.tst ranger.ok \
ret.tst ret.ok \
+ skip.tst skip.ok \
tramp.tst tramp.ok \
va_list.tst va_list.ok \
+ bit.tst bit.ok \
check.sh \
check.x87.sh \
check.arm.sh check.swf.sh \
check.arm4.swf.sh \
check.nodata.sh \
check.x87.nodata.sh \
- run-test all.tst
+ run-test all.tst \
+ collatz.tst factorial.tst
base_TESTS = \
3to2 add align allocai \
clobber carry call \
float jmpr live put \
qalu_mul qalu_div \
- range ranger ret tramp \
- va_list
+ range ranger ret skip tramp \
+ va_list bit
$(base_TESTS): check.sh
$(LN_S) $(srcdir)/check.sh $@
clobber.nodata carry.nodata call.nodata \
float.nodata jmpr.nodata tramp.nodata \
range.nodata ranger.nodata put.nodata \
- va_list.nodata
+ va_list.nodata bit.nodata
$(nodata_TESTS): check.nodata.sh
$(LN_S) $(srcdir)/check.nodata.sh $@
TESTS += $(nodata_TESTS)
endif
-TESTS += ccall self setcode nodata ctramp carg cva_list catomic
+TESTS += ccall self setcode nodata ctramp carg cva_list catomic \
+ protect riprel
CLEANFILES = $(TESTS)
#TESTS_ENVIRONMENT=$(srcdir)/run-test;
.code
prolog
allocai 32 $buf
- arg $c
- arg $uc
- arg $s
- arg $us
- arg $i
+ arg_c $c
+ arg_c $uc
+ arg_s $s
+ arg_s $us
+ arg_i $i
+ arg_i $ui
#if __WORDSIZE == 64
- arg $ui
- arg $l
+ arg_l $l
#endif
+ arg $a
getarg_c %r0 $c
getarg_uc %r0 $uc
getarg_s %r0 $s
getarg_ui %r0 $ui
getarg_l %r0 $l
#endif
+ getarg %r0 $a
+ putargr_c %r0 $c
+ putargi_c 1 $c
+ putargr_uc %r0 $uc
+ putargi_uc 1 $uc
+ putargr_s %r0 $s
+ putargi_s 1 $s
+ putargr_us %r0 $us
+ putargi_us 1 $us
+ putargr_i %r0 $i
+ putargi_i 1 $ui
+#if __WORDSIZE == 64
+ putargr_ui %r0 $ui
+ putargi_ui 1 $ui
+ putargr_l %r0 $l
+ putargi_l 1 $l
+#endif
+ putargr %r0 $a
+ putargi 1 $a
addr %r0 %r1 %r2
addi %r0 %r1 2
addcr %r0 %r1 %r2
rshi_u %r0 %r1 2
negr %r0 %r1
comr %r0 %r1
+ clor %r0 %r1
+ clzr %r0 %r1
+ ctor %r0 %r1
+ ctzr %r0 %r1
ltr %r0 %r1 %r2
lti %r0 %r1 2
ltr_u %r0 %r1 %r2
callr %r0
calli label
prepare
+ pushargr_c %r0
+ pushargr_uc %r0
+ pushargr_s %r0
+ pushargr_us %r0
+ pushargr_i %r0
+#if __WORDSIZE == 64
+ pushargr_ui %r0
+ pushargr_l %r0
+#endif
pushargr %r0
finishr %r0
prepare
ellipsis
finishi 0x80000000
ret
+ retr_c %r1
+ retr_uc %r1
+ retr_s %r1
+ retr_us %r1
+ retr_i %r1
+#if __WORDSIZE == 64
+ retr_ui %r1
+ retr_l %r1
+#endif
retr %r1
reti 2
retval_c %r1
#endif
arg_f $f
getarg_f %f1 $f
+ putargr_f %f1 $f
+ putargi_f 1.0 $f
addr_f %f0 %f1 %f2
addi_f %f0 %f1 0.5
subr_f %f0 %f1 %f2
retval_f %f1
arg_d $f
getarg_d %f1 $f
+ putargr_d %f1 $f
+ putargi_d 1.0 $f
addr_d %f0 %f1 %f2
addi_d %f0 %f1 0.5
subr_d %f0 %f1 %f2
#define fill_us fill_s
#define fill_ui fill_i
-#define ARG( T, N) arg $arg##T##N
+#define ARG( T, N) arg##T $arg##T##N
#define ARGF( T, N) arg##T $arg##T##N
#define ARG1( K, T) ARG##K(T, 0)
#define ARG2( K, T) ARG1( K, T) ARG##K(T, 1)
#define ARG15(K, T) ARG14(K, T) ARG##K(T, 14)
#define ARG16(K, T) ARG15(K, T) ARG##K(T, 15)
#define ARG_c(N) ARG##N( , _c)
-#define ARG_uc(N) ARG##N( , _uc)
+#define ARG_uc(N) ARG##N( , _c)
#define ARG_s(N) ARG##N( , _s)
-#define ARG_us(N) ARG##N( , _us)
+#define ARG_us(N) ARG##N( , _s)
#define ARG_i(N) ARG##N( , _i)
-#define ARG_ui(N) ARG##N( , _ui)
+#define ARG_ui(N) ARG##N( , _i)
#define ARG_l(N) ARG##N( , _l)
#define ARG_f(N) ARG##N(F, _f)
#define ARG_d(N) ARG##N(F, _d)
-#define CHK(N, T, V) \
- getarg %r0 $arg##T##V \
+#define CHK(N, T, TT, V) \
+ getarg##T %r0 $arg##TT##V \
ldxi##T %r1 %v0 $(V * szof##T) \
beqr N##T##V %r0 %r1 \
calli @abort \
N##T##V:
-#define CHKF(N, T, V) \
- getarg##T %f0 $arg##T##V \
+#define CHKF(N, T, TT, V) \
+ getarg##T %f0 $arg##TT##V \
ldxi##T %f1 %v0 $(V * szof##T) \
beqr##T N##T##V %f0 %f1 \
calli @abort \
N##T##V:
-#define GET1( K, N, T, V) CHK##K(N, T, 0)
-#define GET2( K, N, T, V) GET1( K, N, T, V) CHK##K(N, T, 1)
-#define GET3( K, N, T, V) GET2( K, N, T, V) CHK##K(N, T, 2)
-#define GET4( K, N, T, V) GET3( K, N, T, V) CHK##K(N, T, 3)
-#define GET5( K, N, T, V) GET4( K, N, T, V) CHK##K(N, T, 4)
-#define GET6( K, N, T, V) GET5( K, N, T, V) CHK##K(N, T, 5)
-#define GET7( K, N, T, V) GET6( K, N, T, V) CHK##K(N, T, 6)
-#define GET8( K, N, T, V) GET7( K, N, T, V) CHK##K(N, T, 7)
-#define GET9( K, N, T, V) GET8( K, N, T, V) CHK##K(N, T, 8)
-#define GET10(K, N, T, V) GET9( K, N, T, V) CHK##K(N, T, 9)
-#define GET11(K, N, T, V) GET10(K, N, T, V) CHK##K(N, T, 10)
-#define GET12(K, N, T, V) GET11(K, N, T, V) CHK##K(N, T, 11)
-#define GET13(K, N, T, V) GET12(K, N, T, V) CHK##K(N, T, 12)
-#define GET14(K, N, T, V) GET13(K, N, T, V) CHK##K(N, T, 13)
-#define GET15(K, N, T, V) GET14(K, N, T, V) CHK##K(N, T, 14)
-#define GET16(K, N, T, V) GET15(K, N, T, V) CHK##K(N, T, 15)
+#define GET1( K, N, T, TT, V) CHK##K(N, T, TT, 0)
+#define GET2( K, N, T, TT, V) GET1( K, N, T, TT, V) CHK##K(N, T, TT, 1)
+#define GET3( K, N, T, TT, V) GET2( K, N, T, TT, V) CHK##K(N, T, TT, 2)
+#define GET4( K, N, T, TT, V) GET3( K, N, T, TT, V) CHK##K(N, T, TT, 3)
+#define GET5( K, N, T, TT, V) GET4( K, N, T, TT, V) CHK##K(N, T, TT, 4)
+#define GET6( K, N, T, TT, V) GET5( K, N, T, TT, V) CHK##K(N, T, TT, 5)
+#define GET7( K, N, T, TT, V) GET6( K, N, T, TT, V) CHK##K(N, T, TT, 6)
+#define GET8( K, N, T, TT, V) GET7( K, N, T, TT, V) CHK##K(N, T, TT, 7)
+#define GET9( K, N, T, TT, V) GET8( K, N, T, TT, V) CHK##K(N, T, TT, 8)
+#define GET10(K, N, T, TT, V) GET9( K, N, T, TT, V) CHK##K(N, T, TT, 9)
+#define GET11(K, N, T, TT, V) GET10(K, N, T, TT, V) CHK##K(N, T, TT, 10)
+#define GET12(K, N, T, TT, V) GET11(K, N, T, TT, V) CHK##K(N, T, TT, 11)
+#define GET13(K, N, T, TT, V) GET12(K, N, T, TT, V) CHK##K(N, T, TT, 12)
+#define GET14(K, N, T, TT, V) GET13(K, N, T, TT, V) CHK##K(N, T, TT, 13)
+#define GET15(K, N, T, TT, V) GET14(K, N, T, TT, V) CHK##K(N, T, TT, 14)
+#define GET16(K, N, T, TT, V) GET15(K, N, T, TT, V) CHK##K(N, T, TT, 15)
-#define GET_c(N, M) GET##N( , c##N, _c, M)
-#define GET_uc(N, M) GET##N( , uc##N, _uc, M)
-#define GET_s(N, M) GET##N( , s##N, _s, M)
-#define GET_us(N, M) GET##N( , us##N, _us, M)
-#define GET_i(N, M) GET##N( , i##N, _i, M)
-#define GET_ui(N, M) GET##N( , ui##N, _ui, M)
-#define GET_l(N, M) GET##N( , l##N, _l, M)
-#define GET_f(N, M) GET##N(F, f##N, _f, M)
-#define GET_d(N, M) GET##N(F, d##N, _d, M)
+#define GET_c(N, M) GET##N( , c##N, _c, _c, M)
+#define GET_uc(N, M) GET##N( , uc##N, _uc, _c, M)
+#define GET_s(N, M) GET##N( , s##N, _s, _s, M)
+#define GET_us(N, M) GET##N( , us##N, _us, _s, M)
+#define GET_i(N, M) GET##N( , i##N, _i, _i, M)
+#define GET_ui(N, M) GET##N( , ui##N, _ui, _i, M)
+#define GET_l(N, M) GET##N( , l##N, _l, _l, M)
+#define GET_f(N, M) GET##N(F, f##N, _f, _f, M)
+#define GET_d(N, M) GET##N(F, d##N, _d, _d, M)
-#define PUSH( T, V) pushargi V
+#define PUSH( T, V) pushargi##T V
#define PUSHF( T, V) pushargi##T V
#define PUSH0( K, T) /**/
#define PUSH1( K, T) PUSH##K(T, 0)
ret \
epilog
-#define DEFN(N, M, T) \
+#define DEFN(N, M, T, TT) \
name test##T##_##N \
test##T##_##N: \
prolog \
arg $argp \
/* stack buffer in %v0 */ \
getarg %v0 $argp \
- ARG##T(N) \
+ ARG##TT(N) \
/* validate arguments */ \
GET##T(N, M) \
/* heap buffer in %v1 */ \
ret \
epilog
-#define DEF( T) \
+#define DEF( T, TT) \
DEF0( T) \
- DEFN( 1, 0, T) \
- DEFN( 2, 1, T) \
- DEFN( 3, 2, T) \
- DEFN( 4, 3, T) \
- DEFN( 5, 4, T) \
- DEFN( 6, 5, T) \
- DEFN( 7, 6, T) \
- DEFN( 8, 7, T) \
- DEFN( 9, 8, T) \
- DEFN(10, 9, T) \
- DEFN(11, 10, T) \
- DEFN(12, 11, T) \
- DEFN(13, 12, T) \
- DEFN(14, 13, T) \
- DEFN(15, 14, T) \
- DEFN(16, 15, T) \
+ DEFN( 1, 0, T, TT) \
+ DEFN( 2, 1, T, TT) \
+ DEFN( 3, 2, T, TT) \
+ DEFN( 4, 3, T, TT) \
+ DEFN( 5, 4, T, TT) \
+ DEFN( 6, 5, T, TT) \
+ DEFN( 7, 6, T, TT) \
+ DEFN( 8, 7, T, TT) \
+ DEFN( 9, 8, T, TT) \
+ DEFN(10, 9, T, TT) \
+ DEFN(11, 10, T, TT) \
+ DEFN(12, 11, T, TT) \
+ DEFN(13, 12, T, TT) \
+ DEFN(14, 13, T, TT) \
+ DEFN(15, 14, T, TT) \
+ DEFN(16, 15, T, TT) \
DEFX(T)
#define CALL(T) calli test##T##_17
FILLF(_f)
FILLF(_d)
- DEF(_c)
- DEF(_uc)
- DEF(_s)
- DEF(_us)
- DEF(_i)
+ DEF(_c, _c)
+ DEF(_uc, _c)
+ DEF(_s, _s)
+ DEF(_us, _s)
+ DEF(_i, _i)
#if __WORDSIZE == 64
- DEF(_ui)
- DEF(_l)
+ DEF(_ui, _i)
+ DEF(_l, _l)
#endif
- DEF(_f)
- DEF(_d)
+ DEF(_f, _f)
+ DEF(_d, _d)
name main
main:
--- /dev/null
+/* If the fallback clor, clzr, ctor and ctzr are used, it might be better
+ * to implement it as functions, as inlined it is almost as large as a
+ * function.
+ * Below is an example of how to do it.
+ */
+
+.data 4096
+str_clo:
+.c "clo"
+str_clz:
+.c "clz"
+str_cto:
+.c "cto"
+str_ctz:
+.c "ctz"
+print_fmt:
+#if __WORDSIZE == 64
+.c "%s (0x%016lx) %s = %d\n"
+#else
+.c "%s (0x%08lx) %s = %d\n"
+#endif
+ok:
+.c "ok\n"
+
+#define BIT2(OP, ARG, RES, R0, R1) \
+ movi %R1 ARG \
+ OP##r %R0 %R1 \
+ beqi OP##R0##R1##ARG %R0 RES \
+ calli @abort \
+OP##R0##R1##ARG:
+
+#define BIT1(OP, ARG, RES, V0, V1, V2, R0, R1, R2) \
+ BIT2(OP, ARG, RES, V0, V0) \
+ BIT2(OP, ARG, RES, V0, V1) \
+ BIT2(OP, ARG, RES, V0, V2) \
+ BIT2(OP, ARG, RES, V0, R0) \
+ BIT2(OP, ARG, RES, V0, R1) \
+ BIT2(OP, ARG, RES, V0, R2)
+
+#define BIT(OP, ARG, RES, V0, V1, V2, R0, R1, R2) \
+ BIT1(OP, ARG, RES, V1, V2, R0, R1, R2, V0) \
+ BIT1(OP, ARG, RES, V2, R0, R1, R2, V0, V1) \
+ BIT1(OP, ARG, RES, R0, R1, R2, V0, V1, V2) \
+ BIT1(OP, ARG, RES, R1, R2, V0, V1, V2, R0) \
+ BIT1(OP, ARG, RES, R2, V0, V1, V2, R0, R1)
+
+#define CLO(ARG, RES) \
+ BIT(clo, ARG, RES, v0, v1, v2, r0, r1, r2)
+#define CLZ(ARG, RES) \
+ BIT(clz, ARG, RES, v0, v1, v2, r0, r1, r2)
+#define CTO(ARG, RES) \
+ BIT(cto, ARG, RES, v0, v1, v2, r0, r1, r2)
+#define CTZ(ARG, RES) \
+ BIT(ctz, ARG, RES, v0, v1, v2, r0, r1, r2)
+
+.code
+ jmpi main
+/*
+ jit_uword_t cto(jit_uword_t r0) {
+ r0 = ~r0;
+ if (r0 == 0)
+ r0 = __WORDSIZE;
+ else
+ r0 = ctz(r0);
+ return r0;
+ }
+ */
+name cto
+cto:
+ prolog
+ arg $in
+ getarg %r0 $in
+ comr %r0 %r0
+ bnei do_cto %r0 0
+ movi %r0 __WORDSIZE
+ jmpi done_cto
+do_cto:
+ prepare
+ pushargr %r0
+ finishi ctz
+ retval %r0
+done_cto:
+ retr %r0
+ epilog
+
+/*
+ jit_uword_t clo(jit_uword_t r0) {
+ r0 = ~r0;
+ if (r0 == 0)
+ r0 = __WORDSIZE;
+ else
+ r0 = clz(r0);
+ return r0;
+ }
+ */
+name clo
+clo:
+ prolog
+ arg $in
+ getarg %r0 $in
+ comr %r0 %r0
+ bnei do_clo %r0 0
+ movi %r0 __WORDSIZE
+ jmpi done_clo
+do_clo:
+ prepare
+ pushargr %r0
+ finishi clz
+ retval %r0
+done_clo:
+ retr %r0
+ epilog
+
+/*
+ jit_uword_t clz(jit_word_t r1) {
+ jit_uword_t r0, r2;
+ if (r1 == 0)
+ r0 = __WORDSIZE;
+ else {
+ r0 = 0;
+ #if __WORDSIZE == 64
+ r2 = 0xffffffff00000000UL;
+ if (!(r1 & r2)) {
+ r1 <<= 32;
+ r0 += 32;
+ }
+ r2 <<= 16;
+ #else
+ r2 = 0xffff0000UL;
+ #endif
+ if (!(r1 & r2)) {
+ r1 <<= 16;
+ r0 += 16;
+ }
+ r2 <<= 8;
+ if (!(r1 & r2)) {
+ r1 <<= 8;
+ r0 += 8;
+ }
+ r2 <<= 4;
+ if (!(r1 & r2)) {
+ r1 <<= 4;
+ r0 += 4;
+ }
+ r2 <<= 2;
+ if (!(r1 & r2)) {
+ r1 <<= 2;
+ r0 += 2;
+ }
+ r2 <<= 1;
+ if (!(r1 & r2))
+ r0 += 1;
+ }
+ return r0;
+ }
+ */
+name clz
+clz:
+ prolog
+ arg $in
+ getarg %r1 $in
+ bnei lun %r1 0
+ reti __WORDSIZE
+lun:
+ movi %r0 0
+#if __WORDSIZE == 64
+ movi %r2 0xffffffff00000000
+ bmsr l32 %r1 %r2
+ lshi %r1 %r1 32
+ addi %r0 %r0 32
+l32:
+ lshi %r2 %r2 16
+#else
+ movi %r2 0xffff0000
+#endif
+ bmsr l16 %r1 %r2
+ lshi %r1 %r1 16
+ addi %r0 %r0 16
+l16:
+ lshi %r2 %r2 8
+ bmsr l8 %r1 %r2
+ lshi %r1 %r1 8
+ addi %r0 %r0 8
+l8:
+ lshi %r2 %r2 4
+ bmsr l4 %r1 %r2
+ lshi %r1 %r1 4
+ addi %r0 %r0 4
+l4:
+ lshi %r2 %r2 2
+ bmsr l2 %r1 %r2
+ lshi %r1 %r1 2
+ addi %r0 %r0 2
+l2:
+ lshi %r2 %r2 1
+ bmsr l1 %r1 %r2
+ addi %r0 %r0 1
+l1:
+ retr %r0
+ epilog
+
+/*
+ jit_uword_t ctz(jit_uword_t r1) {
+ jit_uword_t r0, r2;
+ if (r1 == 0)
+ r0 = __WORDSIZE;
+ else {
+ r0 = 0;
+ #if __WORDSIZE == 64
+ r2 = 0xffffffffUL;;
+ if (!(r1 & r2)) {
+ r1 >>= 32;
+ r0 += 32;
+ }
+ r2 >>= 16;
+ #else
+ r2 = 0xffffUL;;
+ #endif
+ if (!(r1 & r2)) {
+ r1 >>= 16;
+ r0 += 16;
+ }
+ r2 >>= 8;
+ if (!(r1 & r2)) {
+ r1 >>= 8;
+ r0 += 8;
+ }
+ r2 >>= 4;
+ if (!(r1 & r2)) {
+ r1 >>= 4;
+ r0 += 4;
+ }
+ r2 >>= 2;
+ if (!(r1 & r2)) {
+ r1 >>= 2;
+ r0 += 2;
+ }
+ r2 >>= 1;
+ if (!(r1 & r2))
+ r0 += 1;
+ }
+ return r0;
+ }
+*/
+name ctz
+ctz:
+ prolog
+ arg $in
+ getarg %r1 $in
+ bnei tun %r1 0
+ reti __WORDSIZE
+tun:
+#if __WORDSIZE == 64
+ movi %r0 0
+ movi %r2 0xffffffff
+ bmsr t32 %r1 %r2
+ rshi_u %r1 %r1 32
+ addi %r0 %r0 32
+t32:
+ rshi %r2 %r2 16
+#else
+ movi %r2 0xffff
+#endif
+ bmsr t16 %r1 %r2
+ rshi_u %r1 %r1 16
+ addi %r0 %r0 16
+t16:
+ rshi %r2 %r2 8
+ bmsr t8 %r1 %r2
+ rshi_u %r1 %r1 8
+ addi %r0 %r0 8
+t8:
+ rshi %r2 %r2 4
+ bmsr t4 %r1 %r2
+ rshi_u %r1 %r1 4
+ addi %r0 %r0 4
+t4:
+ rshi %r2 %r2 2
+ bmsr t2 %r1 %r2
+ rshi_u %r1 %r1 2
+ addi %r0 %r0 2
+t2:
+ rshi %r2 %r2 1
+ bmsr t1 %r1 %r2
+ addi %r0 %r0 1
+t1:
+ retr %r0
+ epilog
+
+/*
+ char *bitsprint(char *v0, jit_uword_t v1) {
+ jit_uword_t r0, r1;
+ memset(v0, '0', __WORDSIZE);
+ v0[__WORDSIZE] = 0;
+ for (r0 = 1L << (__WORDSIZE - 1), r1 = 0; r0; r0 >>= 1, ++r1) {
+ if (v1 & r0)
+ v0[r1] = '1';
+ }
+ return v0;
+ }
+ */
+name bitsprint
+bitsprint:
+ prolog
+ arg $buf
+ arg $val
+ getarg %v0 $buf
+ getarg %v1 $val
+ prepare
+ pushargr %v0
+ pushargi '0'
+ pushargi __WORDSIZE
+ finishi @memset
+ movi %r0 0
+ addi %r1 %v0 __WORDSIZE
+ str_c %r1 %r0
+ movi %r0 $(1 << (__WORDSIZE - 1))
+ movi %r1 0
+ movi %r2 '1'
+bitloop:
+ bmcr bitzero %v1 %r0
+ stxr_c %r1 %v0 %r2
+bitzero:
+ addi %r1 %r1 1
+ rshi_u %r0 %r0 1
+ bnei bitloop %r0 0
+ retr %v0
+ epilog
+
+/*
+ #if 0
+ int main(int argc, char *argv[]) {
+ jit_uword_t r0, v0, v1, v2;
+ char buf[80];
+ #if __WORDSIZE == 64
+ char *fmt = "%s (0x%016lx) %s = %d\n";
+ v0 = 0x8000000000000000UL;
+ v2 = 0xffffffffffffffffUL;
+ #else
+ char *fmt = "%s (0x%08lx) %s = %d\n";
+ v0 = 0x80000000UL;
+ v2 = 0xffffffffUL;
+ #endif
+ do {
+ v1 = v0 - 1;
+ r0 = clz(v0);
+ bitsprint(buf, v0);
+ printf(fmt, "clz", v0, buf, r0);
+ r0 = clo(v2);
+ bitsprint(buf, v2);
+ printf(fmt, "clo", v2, buf, r0);
+ r0 = ctz(v0);
+ bitsprint(buf, v0);
+ printf(fmt, "ctz", v0, buf, r0);
+ r0 = cto(v1);
+ bitsprint(buf, v1);
+ printf(fmt, "cto", v1, buf, r0);
+ v0 >>= 1;
+ v2 <<= 1;
+ } while ((jit_word_t)v1 > -1);
+ return 0;
+ }
+ #endif
+ */
+
+/* Make it "#if 1" for a "debug mode", that helps in regenerating tables,
+ * or temporary state while implementing optimized port specific versions. */
+#if 0
+#define CALL_FUNC 1
+ name main
+main:
+ prolog
+ allocai 80 $buf
+#if __WORDSIZE == 64
+ movi %v0 0x8000000000000000
+ movi %v2 0xffffffffffffffff
+#else
+ movi %v0 0x80000000
+ movi %v2 0xffffffff
+#endif
+loop:
+ subi %v1 %v0 1
+ addi %r1 %fp $buf
+ prepare
+ pushargr %r1
+ pushargr %v0
+ finishi bitsprint
+#if CALL_FUNC
+ prepare
+ pushargr %v0
+ finishi clz
+ retval %r0
+#else
+ clzr %r0 %v0
+#endif
+ addi %r1 %fp $buf
+ prepare
+ pushargi print_fmt
+ ellipsis
+ pushargi str_clz
+ pushargr %v0
+ pushargr %r1
+ pushargr %r0
+ finishi @printf
+ addi %r1 %fp $buf
+ prepare
+ pushargr %r1
+ pushargr %v2
+ finishi bitsprint
+#if CALL_FUNC
+ prepare
+ pushargr %v2
+ finishi clo
+ retval %r0
+#else
+ clor %r0 %v2
+#endif
+ addi %r1 %fp $buf
+ prepare
+ pushargi print_fmt
+ ellipsis
+ pushargi str_clo
+ pushargr %v2
+ pushargr %r1
+ pushargr %r0
+ finishi @printf
+ addi %r1 %fp $buf
+ prepare
+ pushargr %r1
+ pushargr %v0
+ finishi bitsprint
+#if CALL_FUNC
+ prepare
+ pushargr %v0
+ finishi ctz
+ retval %r0
+#else
+ ctzr %r0 %v0
+#endif
+ addi %r1 %fp $buf
+ prepare
+ pushargi print_fmt
+ ellipsis
+ pushargi str_ctz
+ pushargr %v0
+ pushargr %r1
+ pushargr %r0
+ finishi @printf
+ addi %r1 %fp $buf
+ prepare
+ pushargr %r1
+ pushargr %v1
+ finishi bitsprint
+#if CALL_FUNC
+ prepare
+ pushargr %v1
+ finishi cto
+ retval %r0
+#else
+ ctor %r0 %v1
+#endif
+ addi %r1 %fp $buf
+ prepare
+ pushargi print_fmt
+ ellipsis
+ pushargi str_cto
+ pushargr %v1
+ pushargr %r1
+ pushargr %r0
+ finishi @printf
+ rshi_u %v0 %v0 1
+ lshi %v2 %v2 1
+ bgti loop %v1 -1
+ ret
+ epilog
+#else
+
+ name main
+main:
+ prolog
+#if __WORDSIZE == 32
+ CLZ(0x80000000, 0)
+ CLO(0xffffffff, 32)
+ CTZ(0x80000000, 31)
+ CTO(0x7fffffff, 31)
+ CLZ(0x40000000, 1)
+ CLO(0xfffffffe, 31)
+ CTZ(0x40000000, 30)
+ CTO(0x3fffffff, 30)
+ CLZ(0x20000000, 2)
+ CLO(0xfffffffc, 30)
+ CTZ(0x20000000, 29)
+ CTO(0x1fffffff, 29)
+ CLZ(0x10000000, 3)
+ CLO(0xfffffff8, 29)
+ CTZ(0x10000000, 28)
+ CTO(0x0fffffff, 28)
+ CLZ(0x08000000, 4)
+ CLO(0xfffffff0, 28)
+ CTZ(0x08000000, 27)
+ CTO(0x07ffffff, 27)
+ CLZ(0x04000000, 5)
+ CLO(0xffffffe0, 27)
+ CTZ(0x04000000, 26)
+ CTO(0x03ffffff, 26)
+ CLZ(0x02000000, 6)
+ CLO(0xffffffc0, 26)
+ CTZ(0x02000000, 25)
+ CTO(0x01ffffff, 25)
+ CLZ(0x01000000, 7)
+ CLO(0xffffff80, 25)
+ CTZ(0x01000000, 24)
+ CTO(0x00ffffff, 24)
+ CLZ(0x00800000, 8)
+ CLO(0xffffff00, 24)
+ CTZ(0x00800000, 23)
+ CTO(0x007fffff, 23)
+ CLZ(0x00400000, 9)
+ CLO(0xfffffe00, 23)
+ CTZ(0x00400000, 22)
+ CTO(0x003fffff, 22)
+ CLZ(0x00200000, 10)
+ CLO(0xfffffc00, 22)
+ CTZ(0x00200000, 21)
+ CTO(0x001fffff, 21)
+ CLZ(0x00100000, 11)
+ CLO(0xfffff800, 21)
+ CTZ(0x00100000, 20)
+ CTO(0x000fffff, 20)
+ CLZ(0x00080000, 12)
+ CLO(0xfffff000, 20)
+ CTZ(0x00080000, 19)
+ CTO(0x0007ffff, 19)
+ CLZ(0x00040000, 13)
+ CLO(0xffffe000, 19)
+ CTZ(0x00040000, 18)
+ CTO(0x0003ffff, 18)
+ CLZ(0x00020000, 14)
+ CLO(0xffffc000, 18)
+ CTZ(0x00020000, 17)
+ CTO(0x0001ffff, 17)
+ CLZ(0x00010000, 15)
+ CLO(0xffff8000, 17)
+ CTZ(0x00010000, 16)
+ CTO(0x0000ffff, 16)
+ CLZ(0x00008000, 16)
+ CLO(0xffff0000, 16)
+ CTZ(0x00008000, 15)
+ CTO(0x00007fff, 15)
+ CLZ(0x00004000, 17)
+ CLO(0xfffe0000, 15)
+ CTZ(0x00004000, 14)
+ CTO(0x00003fff, 14)
+ CLZ(0x00002000, 18)
+ CLO(0xfffc0000, 14)
+ CTZ(0x00002000, 13)
+ CTO(0x00001fff, 13)
+ CLZ(0x00001000, 19)
+ CLO(0xfff80000, 13)
+ CTZ(0x00001000, 12)
+ CTO(0x00000fff, 12)
+ CLZ(0x00000800, 20)
+ CLO(0xfff00000, 12)
+ CTZ(0x00000800, 11)
+ CTO(0x000007ff, 11)
+ CLZ(0x00000400, 21)
+ CLO(0xffe00000, 11)
+ CTZ(0x00000400, 10)
+ CTO(0x000003ff, 10)
+ CLZ(0x00000200, 22)
+ CLO(0xffc00000, 10)
+ CTZ(0x00000200, 9)
+ CTO(0x000001ff, 9)
+ CLZ(0x00000100, 23)
+ CLO(0xff800000, 9)
+ CTZ(0x00000100, 8)
+ CTO(0x000000ff, 8)
+ CLZ(0x00000080, 24)
+ CLO(0xff000000, 8)
+ CTZ(0x00000080, 7)
+ CTO(0x0000007f, 7)
+ CLZ(0x00000040, 25)
+ CLO(0xfe000000, 7)
+ CTZ(0x00000040, 6)
+ CTO(0x0000003f, 6)
+ CLZ(0x00000020, 26)
+ CLO(0xfc000000, 6)
+ CTZ(0x00000020, 5)
+ CTO(0x0000001f, 5)
+ CLZ(0x00000010, 27)
+ CLO(0xf8000000, 5)
+ CTZ(0x00000010, 4)
+ CTO(0x0000000f, 4)
+ CLZ(0x00000008, 28)
+ CLO(0xf0000000, 4)
+ CTZ(0x00000008, 3)
+ CTO(0x00000007, 3)
+ CLZ(0x00000004, 29)
+ CLO(0xe0000000, 3)
+ CTZ(0x00000004, 2)
+ CTO(0x00000003, 2)
+ CLZ(0x00000002, 30)
+ CLO(0xc0000000, 2)
+ CTZ(0x00000002, 1)
+ CTO(0x00000001, 1)
+ CLZ(0x00000001, 31)
+ CLO(0x80000000, 1)
+ CTZ(0x00000001, 0)
+ CTO(0x00000000, 0)
+ CLZ(0x00000000, 32)
+ CLO(0x00000000, 0)
+ CTZ(0x00000000, 32)
+ CTO(0xffffffff, 32)
+#else
+ CLZ(0x8000000000000000, 0)
+ CLO(0xffffffffffffffff, 64)
+ CTZ(0x8000000000000000, 63)
+ CTO(0x7fffffffffffffff, 63)
+ CLZ(0x4000000000000000, 1)
+ CLO(0xfffffffffffffffe, 63)
+ CTZ(0x4000000000000000, 62)
+ CTO(0x3fffffffffffffff, 62)
+ CLZ(0x2000000000000000, 2)
+ CLO(0xfffffffffffffffc, 62)
+ CTZ(0x2000000000000000, 61)
+ CTO(0x1fffffffffffffff, 61)
+ CLZ(0x1000000000000000, 3)
+ CLO(0xfffffffffffffff8, 61)
+ CTZ(0x1000000000000000, 60)
+ CTO(0x0fffffffffffffff, 60)
+ CLZ(0x0800000000000000, 4)
+ CLO(0xfffffffffffffff0, 60)
+ CTZ(0x0800000000000000, 59)
+ CTO(0x07ffffffffffffff, 59)
+ CLZ(0x0400000000000000, 5)
+ CLO(0xffffffffffffffe0, 59)
+ CTZ(0x0400000000000000, 58)
+ CTO(0x03ffffffffffffff, 58)
+ CLZ(0x0200000000000000, 6)
+ CLO(0xffffffffffffffc0, 58)
+ CTZ(0x0200000000000000, 57)
+ CTO(0x01ffffffffffffff, 57)
+ CLZ(0x0100000000000000, 7)
+ CLO(0xffffffffffffff80, 57)
+ CTZ(0x0100000000000000, 56)
+ CTO(0x00ffffffffffffff, 56)
+ CLZ(0x0080000000000000, 8)
+ CLO(0xffffffffffffff00, 56)
+ CTZ(0x0080000000000000, 55)
+ CTO(0x007fffffffffffff, 55)
+ CLZ(0x0040000000000000, 9)
+ CLO(0xfffffffffffffe00, 55)
+ CTZ(0x0040000000000000, 54)
+ CTO(0x003fffffffffffff, 54)
+ CLZ(0x0020000000000000, 10)
+ CLO(0xfffffffffffffc00, 54)
+ CTZ(0x0020000000000000, 53)
+ CTO(0x001fffffffffffff, 53)
+ CLZ(0x0010000000000000, 11)
+ CLO(0xfffffffffffff800, 53)
+ CTZ(0x0010000000000000, 52)
+ CTO(0x000fffffffffffff, 52)
+ CLZ(0x0008000000000000, 12)
+ CLO(0xfffffffffffff000, 52)
+ CTZ(0x0008000000000000, 51)
+ CTO(0x0007ffffffffffff, 51)
+ CLZ(0x0004000000000000, 13)
+ CLO(0xffffffffffffe000, 51)
+ CTZ(0x0004000000000000, 50)
+ CTO(0x0003ffffffffffff, 50)
+ CLZ(0x0002000000000000, 14)
+ CLO(0xffffffffffffc000, 50)
+ CTZ(0x0002000000000000, 49)
+ CTO(0x0001ffffffffffff, 49)
+ CLZ(0x0001000000000000, 15)
+ CLO(0xffffffffffff8000, 49)
+ CTZ(0x0001000000000000, 48)
+ CTO(0x0000ffffffffffff, 48)
+ CLZ(0x0000800000000000, 16)
+ CLO(0xffffffffffff0000, 48)
+ CTZ(0x0000800000000000, 47)
+ CTO(0x00007fffffffffff, 47)
+ CLZ(0x0000400000000000, 17)
+ CLO(0xfffffffffffe0000, 47)
+ CTZ(0x0000400000000000, 46)
+ CTO(0x00003fffffffffff, 46)
+ CLZ(0x0000200000000000, 18)
+ CLO(0xfffffffffffc0000, 46)
+ CTZ(0x0000200000000000, 45)
+ CTO(0x00001fffffffffff, 45)
+ CLZ(0x0000100000000000, 19)
+ CLO(0xfffffffffff80000, 45)
+ CTZ(0x0000100000000000, 44)
+ CTO(0x00000fffffffffff, 44)
+ CLZ(0x0000080000000000, 20)
+ CLO(0xfffffffffff00000, 44)
+ CTZ(0x0000080000000000, 43)
+ CTO(0x000007ffffffffff, 43)
+ CLZ(0x0000040000000000, 21)
+ CLO(0xffffffffffe00000, 43)
+ CTZ(0x0000040000000000, 42)
+ CTO(0x000003ffffffffff, 42)
+ CLZ(0x0000020000000000, 22)
+ CLO(0xffffffffffc00000, 42)
+ CTZ(0x0000020000000000, 41)
+ CTO(0x000001ffffffffff, 41)
+ CLZ(0x0000010000000000, 23)
+ CLO(0xffffffffff800000, 41)
+ CTZ(0x0000010000000000, 40)
+ CTO(0x000000ffffffffff, 40)
+ CLZ(0x0000008000000000, 24)
+ CLO(0xffffffffff000000, 40)
+ CTZ(0x0000008000000000, 39)
+ CTO(0x0000007fffffffff, 39)
+ CLZ(0x0000004000000000, 25)
+ CLO(0xfffffffffe000000, 39)
+ CTZ(0x0000004000000000, 38)
+ CTO(0x0000003fffffffff, 38)
+ CLZ(0x0000002000000000, 26)
+ CLO(0xfffffffffc000000, 38)
+ CTZ(0x0000002000000000, 37)
+ CTO(0x0000001fffffffff, 37)
+ CLZ(0x0000001000000000, 27)
+ CLO(0xfffffffff8000000, 37)
+ CTZ(0x0000001000000000, 36)
+ CTO(0x0000000fffffffff, 36)
+ CLZ(0x0000000800000000, 28)
+ CLO(0xfffffffff0000000, 36)
+ CTZ(0x0000000800000000, 35)
+ CTO(0x00000007ffffffff, 35)
+ CLZ(0x0000000400000000, 29)
+ CLO(0xffffffffe0000000, 35)
+ CTZ(0x0000000400000000, 34)
+ CTO(0x00000003ffffffff, 34)
+ CLZ(0x0000000200000000, 30)
+ CLO(0xffffffffc0000000, 34)
+ CTZ(0x0000000200000000, 33)
+ CTO(0x00000001ffffffff, 33)
+ CLZ(0x0000000100000000, 31)
+ CLO(0xffffffff80000000, 33)
+ CTZ(0x0000000100000000, 32)
+ CTO(0x00000000ffffffff, 32)
+ CLZ(0x0000000080000000, 32)
+ CLO(0xffffffff00000000, 32)
+ CTZ(0x0000000080000000, 31)
+ CTO(0x000000007fffffff, 31)
+ CLZ(0x0000000040000000, 33)
+ CLO(0xfffffffe00000000, 31)
+ CTZ(0x0000000040000000, 30)
+ CTO(0x000000003fffffff, 30)
+ CLZ(0x0000000020000000, 34)
+ CLO(0xfffffffc00000000, 30)
+ CTZ(0x0000000020000000, 29)
+ CTO(0x000000001fffffff, 29)
+ CLZ(0x0000000010000000, 35)
+ CLO(0xfffffff800000000, 29)
+ CTZ(0x0000000010000000, 28)
+ CTO(0x000000000fffffff, 28)
+ CLZ(0x0000000008000000, 36)
+ CLO(0xfffffff000000000, 28)
+ CTZ(0x0000000008000000, 27)
+ CTO(0x0000000007ffffff, 27)
+ CLZ(0x0000000004000000, 37)
+ CLO(0xffffffe000000000, 27)
+ CTZ(0x0000000004000000, 26)
+ CTO(0x0000000003ffffff, 26)
+ CLZ(0x0000000002000000, 38)
+ CLO(0xffffffc000000000, 26)
+ CTZ(0x0000000002000000, 25)
+ CTO(0x0000000001ffffff, 25)
+ CLZ(0x0000000001000000, 39)
+ CLO(0xffffff8000000000, 25)
+ CTZ(0x0000000001000000, 24)
+ CTO(0x0000000000ffffff, 24)
+ CLZ(0x0000000000800000, 40)
+ CLO(0xffffff0000000000, 24)
+ CTZ(0x0000000000800000, 23)
+ CTO(0x00000000007fffff, 23)
+ CLZ(0x0000000000400000, 41)
+ CLO(0xfffffe0000000000, 23)
+ CTZ(0x0000000000400000, 22)
+ CTO(0x00000000003fffff, 22)
+ CLZ(0x0000000000200000, 42)
+ CLO(0xfffffc0000000000, 22)
+ CTZ(0x0000000000200000, 21)
+ CTO(0x00000000001fffff, 21)
+ CLZ(0x0000000000100000, 43)
+ CLO(0xfffff80000000000, 21)
+ CTZ(0x0000000000100000, 20)
+ CTO(0x00000000000fffff, 20)
+ CLZ(0x0000000000080000, 44)
+ CLO(0xfffff00000000000, 20)
+ CTZ(0x0000000000080000, 19)
+ CTO(0x000000000007ffff, 19)
+ CLZ(0x0000000000040000, 45)
+ CLO(0xffffe00000000000, 19)
+ CTZ(0x0000000000040000, 18)
+ CTO(0x000000000003ffff, 18)
+ CLZ(0x0000000000020000, 46)
+ CLO(0xffffc00000000000, 18)
+ CTZ(0x0000000000020000, 17)
+ CTO(0x000000000001ffff, 17)
+ CLZ(0x0000000000010000, 47)
+ CLO(0xffff800000000000, 17)
+ CTZ(0x0000000000010000, 16)
+ CTO(0x000000000000ffff, 16)
+ CLZ(0x0000000000008000, 48)
+ CLO(0xffff000000000000, 16)
+ CTZ(0x0000000000008000, 15)
+ CTO(0x0000000000007fff, 15)
+ CLZ(0x0000000000004000, 49)
+ CLO(0xfffe000000000000, 15)
+ CTZ(0x0000000000004000, 14)
+ CTO(0x0000000000003fff, 14)
+ CLZ(0x0000000000002000, 50)
+ CLO(0xfffc000000000000, 14)
+ CTZ(0x0000000000002000, 13)
+ CTO(0x0000000000001fff, 13)
+ CLZ(0x0000000000001000, 51)
+ CLO(0xfff8000000000000, 13)
+ CTZ(0x0000000000001000, 12)
+ CTO(0x0000000000000fff, 12)
+ CLZ(0x0000000000000800, 52)
+ CLO(0xfff0000000000000, 12)
+ CTZ(0x0000000000000800, 11)
+ CTO(0x00000000000007ff, 11)
+ CLZ(0x0000000000000400, 53)
+ CLO(0xffe0000000000000, 11)
+ CTZ(0x0000000000000400, 10)
+ CTO(0x00000000000003ff, 10)
+ CLZ(0x0000000000000200, 54)
+ CLO(0xffc0000000000000, 10)
+ CTZ(0x0000000000000200, 9)
+ CTO(0x00000000000001ff, 9)
+ CLZ(0x0000000000000100, 55)
+ CLO(0xff80000000000000, 9)
+ CTZ(0x0000000000000100, 8)
+ CTO(0x00000000000000ff, 8)
+ CLZ(0x0000000000000080, 56)
+ CLO(0xff00000000000000, 8)
+ CTZ(0x0000000000000080, 7)
+ CTO(0x000000000000007f, 7)
+ CLZ(0x0000000000000040, 57)
+ CLO(0xfe00000000000000, 7)
+ CTZ(0x0000000000000040, 6)
+ CTO(0x000000000000003f, 6)
+ CLZ(0x0000000000000020, 58)
+ CLO(0xfc00000000000000, 6)
+ CTZ(0x0000000000000020, 5)
+ CTO(0x000000000000001f, 5)
+ CLZ(0x0000000000000010, 59)
+ CLO(0xf800000000000000, 5)
+ CTZ(0x0000000000000010, 4)
+ CTO(0x000000000000000f, 4)
+ CLZ(0x0000000000000008, 60)
+ CLO(0xf000000000000000, 4)
+ CTZ(0x0000000000000008, 3)
+ CTO(0x0000000000000007, 3)
+ CLZ(0x0000000000000004, 61)
+ CLO(0xe000000000000000, 3)
+ CTZ(0x0000000000000004, 2)
+ CTO(0x0000000000000003, 2)
+ CLZ(0x0000000000000002, 62)
+ CLO(0xc000000000000000, 2)
+ CTZ(0x0000000000000002, 1)
+ CTO(0x0000000000000001, 1)
+ CLZ(0x0000000000000001, 63)
+ CLO(0x8000000000000000, 1)
+ CTZ(0x0000000000000001, 0)
+ CTO(0x0000000000000000, 0)
+ CLZ(0x0000000000000000, 64)
+ CLO(0x0000000000000000, 0)
+ CTZ(0x0000000000000000, 64)
+ CTO(0xffffffffffffffff, 64)
+#endif
+ prepare
+ pushargi ok
+ finishi @printf
+ reti 0
+ epilog
+#endif
-#define def_wi(i) \
+#define def_wi(i, ii) \
name _w##i \
_w##i: \
prolog \
- arg $arg##i \
+ arg##ii $arg##i \
getarg##i %r0 $arg##i \
- retr %r0 \
+ retr##i %r0 \
epilog
#define def_wf(f) \
name _w##f \
truncr##f %r0 %f0 \
retr %r0 \
epilog
-#define def_fi(f, i) \
+#define def_fi(f, i, ii) \
name f##i \
f##i: \
prolog \
- arg $arg##i \
+ arg##ii $arg##i \
getarg##i %r0 $arg##i \
extr##f %f0 %r0 \
retr##f %f0 \
.code
jmpi main
- def_wi(_c)
- def_wi(_uc)
- def_wi(_s)
- def_wi(_us)
+ def_wi(_c, _c)
+ def_wi(_uc, _c)
+ def_wi(_s, _s)
+ def_wi(_us, _s)
#if __WORDSIZE == 64
- def_wi(_i)
- def_wi(_ui)
+ def_wi(_i, _i)
+ def_wi(_ui, _i)
#endif
def_wf(_f)
def_wf(_d)
- def_fi(_f, _c)
- def_fi(_f, _uc)
- def_fi(_f, _s)
- def_fi(_f, _us)
- def_fi(_f, _i)
+ def_fi(_f, _c, _c)
+ def_fi(_f, _uc, _c)
+ def_fi(_f, _s, _s)
+ def_fi(_f, _us, _s)
+ def_fi(_f, _i, _i)
#if __WORDSIZE == 64
- def_fi(_f, _ui)
- def_fi(_f, _l)
+ def_fi(_f, _ui, _i)
+ def_fi(_f, _l, _l)
#endif
- def_fi(_d, _c)
- def_fi(_d, _uc)
- def_fi(_d, _s)
- def_fi(_d, _us)
- def_fi(_d, _i)
+ def_fi(_d, _c, _c)
+ def_fi(_d, _uc, _c)
+ def_fi(_d, _s, _s)
+ def_fi(_d, _us, _s)
+ def_fi(_d, _i, _i)
#if __WORDSIZE == 64
- def_fi(_d, _ui)
- def_fi(_d, _l)
+ def_fi(_d, _ui, _i)
+ def_fi(_d, _l, _l)
#endif
def_f(_f)
def_f(_d)
#define _call_w(n, i, a, r) \
prepare \
- pushargi a \
+ pushargi##i a \
finishi _w##i \
retval %r0 \
extr##i %r0 %r0 \
#define call_wf(n, f, a, r) _call_wf(n, f, a, r)
#define _call_fi(n, f, i, a, r) \
prepare \
- pushargi a \
+ pushargi##i a \
finishi f##i \
retval##f %f0 \
beqi##f f##i##n %f0 r \
call_wf(__LINE__, _d, c7f, f7f)
call_wf(__LINE__, _d, wc80, f80)
call_wf(__LINE__, _d, wc81, f81)
+
call_fi(__LINE__, _f, _c, c7f, f7f)
call_fi(__LINE__, _f, _c, c80, f80)
call_fi(__LINE__, _f, _uc, c7f, f7f)
main(int argc, char *argv[])
{
void (*code)(void);
- jit_node_t *jmp, *pass;
- jit_node_t *jw, *jf, *jd;
+ jit_node_t *jmp, *pass, *fail;
+ jit_node_t *jw, *jf, *jd;
jit_int32_t s1, s2, s3, s4, s5, s6, s7, s8,
s9, s10, s11, s12, s13, s14, s15, s16;
jit_node_t *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8,
LOAD_ARG(16);
#undef LOAD_ARG
pass = jit_forward();
+ fail = jit_forward();
#define CHECK_ARG(N) \
do { \
jit_getarg(JIT_R0, a##N); \
- jit_patch_at(jit_beqi(JIT_R0, 17 - N), pass); \
+ jit_patch_at(jit_bnei(JIT_R0, 17 - N), fail); \
} while (0)
CHECK_ARG(1);
CHECK_ARG(2);
CHECK_ARG(15);
CHECK_ARG(16);
#undef CHECK_ARG
+ jit_patch_at(jit_jmpi(), pass);
+ jit_link(fail);
jit_calli(abort);
jit_link(pass);
jit_ret();
LOAD_ARG(16);
#undef LOAD_ARG
pass = jit_forward();
+ fail = jit_forward();
#define CHECK_ARG(N) \
do { \
jit_getarg_f(JIT_F0, a##N); \
- jit_patch_at(jit_beqi_f(JIT_F0, 17 - N), pass); \
+ jit_patch_at(jit_bnei_f(JIT_F0, 17 - N), fail); \
} while (0)
CHECK_ARG(1);
CHECK_ARG(2);
CHECK_ARG(15);
CHECK_ARG(16);
#undef CHECK_ARG
+ jit_patch_at(jit_jmpi(), pass);
+ jit_link(fail);
jit_calli(abort);
jit_link(pass);
jit_ret();
LOAD_ARG(16);
#undef LOAD_ARG
pass = jit_forward();
+ fail = jit_forward();
#define CHECK_ARG(N) \
do { \
jit_getarg_d(JIT_F0, a##N); \
- jit_patch_at(jit_beqi_d(JIT_F0, 17 - N), pass); \
+ jit_patch_at(jit_bnei_d(JIT_F0, 17 - N), fail); \
} while (0)
CHECK_ARG(1);
CHECK_ARG(2);
CHECK_ARG(15);
CHECK_ARG(16);
#undef CHECK_ARG
+ jit_patch_at(jit_jmpi(), pass);
+ jit_link(fail);
jit_calli(abort);
jit_link(pass);
jit_ret();
jit_pushargi(1);
}
jit_patch_at(jit_finishi(NULL), jw);
+
jit_prepare();
{
jit_pushargi_f(16);
jit_pushargi_f(1);
}
jit_patch_at(jit_finishi(NULL), jf);
+
jit_prepare();
{
jit_pushargi_d(16);
#define join(tid) \
/* load pthread_t value in JIT_R0 */ \
jit_movi(JIT_R0, (jit_word_t)tids); \
- jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t)); \
+ if (__WORDSIZE == 64 && sizeof(pthread_t) == 4) \
+ jit_ldxi_i(JIT_R0, JIT_R0, tid * sizeof(pthread_t)); \
+ else \
+ jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t)); \
jit_prepare(); \
jit_pushargr(JIT_R0); \
jit_pushargi((jit_word_t)NULL); \
# define _l15 _w15
#endif
+#ifndef jit_arg_uc
+# define jit_arg_uc jit_arg_c
+#endif
+#ifndef jit_arg_us
+# define jit_arg_us jit_arg_s
+#endif
+#ifndef jit_arg_ui
+# define jit_arg_ui jit_arg_i
+#endif
+
/*
* Types
*/
#define arg15(T) arg14(T) a15 = jit_arg##T();
#define get0(B,T,R) jit_movi##B(R##0,0);
-#define get1(B,T,R) jit_getarg##B(R##0,a##1);
+#define get1(B,T,R) jit_getarg##T(R##0,a##1);
#define get2(B,T,R) \
get1(B,T,R); \
jit_movr##B(R##1, R##0); \
n##T##N = jit_name(strfy(n##T##N)); \
jit_note("ccall.c", __LINE__); \
jit_prolog(); \
- arg##N(); \
+ arg##N(T); \
get##N(,T,JIT_R) \
jit_extr##T(JIT_R0, JIT_R0); \
jit_retr(JIT_R0); \
#define calin(T,N) \
jit_prepare(); \
- push##N() \
+ push##N(T) \
jit_finishi(C##T##N); \
jit_retval##T(JIT_R0); \
jmp = jit_beqi(JIT_R0, T##N); \
#undef calfn
#define calin(T,N) \
jit_prepare(); \
- push##N() \
+ push##N(T) \
jit_finishi(CJ##T##N); \
jit_retval##T(JIT_R0); \
jmp = jit_beqi(JIT_R0, T##N); \
--- /dev/null
+.data 32
+str:
+.c "%.0lf\n"
+.code
+ jmpi main
+/*
+ * double factorial(unsigned long n) {
+ * double r = 1;
+ * while (n > 1) {
+ * r *= n;
+ * --n;
+ * }
+ * return r;
+ * }
+ */
+factorial:
+ prolog
+ arg $n
+ getarg %r0 $n
+ movi_d %f0 1.0
+ extr_d %f1 %r0
+ movr_d %f2 %f0
+loop:
+ bltr_d done %f1 %f2
+ mulr_d %f0 %f0 %f1
+ subr_d %f1 %f1 %f2
+ jmpi loop
+done:
+ retr_d %f0
+ epilog
+
+/*
+ * int main(int argc, char *argv[]) {
+ * unsigned long v;
+ * double d;
+ * if (argc == 2)
+ * v = strtoul(argv[1], NULL, 0);
+ * else
+ * v = 32;
+ * d = factorial(v);
+ * printf("%.0lf\n", d);
+ * return 0;
+ * }
+ */
+main:
+ prolog
+ arg $argc
+ arg $argv
+ getarg %r0 $argc
+ bnei default %r0 2
+ getarg %v0 $argv
+ ldxi %r0 %v0 $(__WORDSIZE >> 3)
+ prepare
+ pushargr %r0
+ pushargi 0
+ pushargi 0
+ finishi @strtoul
+ retval %v0
+ jmpi call
+default:
+ movi %v0 32
+call:
+ prepare
+ pushargr %v0
+ finishi factorial
+ retval_d %f0
+ prepare
+ pushargi str
+ ellipsis
+ pushargr_d %f0
+ finishi @printf
+ reti 0
+ epilog
arg $argc
arg $argv
- getarg_i %r0 $argc
+ getarg %r0 $argc
blei default %r0 1
getarg %r0 $argv
addi %r0 %r0 $(__WORDSIZE >> 3)
# define x80 0x8000000000000000
#endif
-#if __mips__ || __sparc__ || __hppa__ || __riscv
+#if (__mips__ && __mips_isa_rev < 6) || __sparc__ || __hppa__ || __riscv
# define wnan x7f
-#elif __arm__ || __aarch64__ || __alpha__ || __loongarch__
+#elif (__mips__ && __mips_isa_rev >= 6) || __arm__ || __aarch64__ || __alpha__ || __loongarch__
# define wnan 0
#else
# define wnan x80
static jit_word_t get_imm(void);
static void live(void);
static void align(void); static void name(void);
+static void skip(void);
static void prolog(void);
static void frame(void); static void tramp(void);
static void ellipsis(void);
static void allocai(void); static void allocar(void);
+static void arg_c(void); static void arg_s(void);
+static void arg_i(void);
+#if __WORDSIZE == 64
+static void arg_l(void);
+#endif
static void arg(void);
static void getarg_c(void); static void getarg_uc(void);
static void getarg_s(void); static void getarg_us(void);
static void getarg_ui(void); static void getarg_l(void);
#endif
static void getarg(void);
+static void putargr_c(void); static void putargi_c(void);
+static void putargr_uc(void); static void putargi_uc(void);
+static void putargr_s(void); static void putargi_s(void);
+static void putargr_us(void); static void putargi_us(void);
+static void putargr_i(void); static void putargi_i(void);
+#if __WORDSIZE == 64
+static void putargr_ui(void); static void putargi_ui(void);
+static void putargr_l(void); static void putargi_l(void);
+#endif
static void putargr(void); static void putargi(void);
static void addr(void); static void addi(void);
static void addxr(void); static void addxi(void);
static void rshr(void); static void rshi(void);
static void rshr_u(void); static void rshi_u(void);
static void negr(void); static void comr(void);
+static void clor(void); static void clzr(void);
+static void ctor(void); static void ctzr(void);
static void ltr(void); static void lti(void);
static void ltr_u(void); static void lti_u(void);
static void ler(void); static void lei(void);
static void jmpr(void); static void jmpi(void);
static void callr(void); static void calli(void);
static void prepare(void);
+
+static void pushargr_c(void); static void pushargi_c(void);
+static void pushargr_uc(void); static void pushargi_uc(void);
+static void pushargr_s(void); static void pushargi_s(void);
+static void pushargr_us(void); static void pushargi_us(void);
+static void pushargr_i(void); static void pushargi_i(void);
+#if __WORDSIZE == 64
+static void pushargr_ui(void); static void pushargi_ui(void);
+static void pushargr_l(void); static void pushargi_l(void);
+#endif
static void pushargr(void); static void pushargi(void);
+
static void finishr(void); static void finishi(void);
static void ret(void);
+
+static void retr_c(void); static void reti_c(void);
+static void retr_uc(void); static void reti_uc(void);
+static void retr_s(void); static void reti_s(void);
+static void retr_us(void); static void reti_us(void);
+static void retr_i(void); static void reti_i(void);
+#if __WORDSIZE == 64
+static void retr_ui(void); static void reti_ui(void);
+static void retr_l(void); static void reti_l(void);
+#endif
static void retr(void); static void reti(void);
static void retval_c(void); static void retval_uc(void);
static void retval_s(void); static void retval_us(void);
#define entry2(name, function) { NULL, name, function }
entry(live),
entry(align), entry(name),
+ entry(skip),
entry(prolog),
entry(frame), entry(tramp),
entry(ellipsis),
entry(allocai), entry(allocar),
+ entry(arg_c), entry(arg_s),
+ entry(arg_i),
+#if __WORDSIZE == 64
+ entry(arg_l),
+#endif
entry(arg),
entry(getarg_c), entry(getarg_uc),
entry(getarg_s), entry(getarg_us),
entry(getarg_ui), entry(getarg_l),
#endif
entry(getarg),
+
+ entry(putargr_c), entry(putargi_c),
+ entry(putargr_uc), entry(putargi_uc),
+ entry(putargr_s), entry(putargi_s),
+ entry(putargr_us), entry(putargi_us),
+ entry(putargr_i), entry(putargi_i),
+#if __WORDSIZE == 64
+ entry(putargr_ui), entry(putargi_ui),
+ entry(putargr_l), entry(putargi_l),
+#endif
entry(putargr), entry(putargi),
entry(addr), entry(addi),
entry(addxr), entry(addxi),
entry(rshr), entry(rshi),
entry(rshr_u), entry(rshi_u),
entry(negr), entry(comr),
+ entry(clor), entry(clzr),
+ entry(ctor), entry(ctzr),
entry(ltr), entry(lti),
entry(ltr_u), entry(lti_u),
entry(ler), entry(lei),
entry(jmpr), entry(jmpi),
entry(callr), entry(calli),
entry(prepare),
+ entry(pushargr_c), entry(pushargi_c),
+ entry(pushargr_uc), entry(pushargi_uc),
+ entry(pushargr_s), entry(pushargi_s),
+ entry(pushargr_us), entry(pushargi_us),
+ entry(pushargr_i), entry(pushargi_i),
+#if __WORDSIZE == 64
+ entry(pushargr_ui), entry(pushargi_ui),
+ entry(pushargr_l), entry(pushargi_l),
+#endif
entry(pushargr), entry(pushargi),
entry(finishr), entry(finishi),
entry(ret),
+ entry(retr_c), entry(reti_c),
+ entry(retr_uc), entry(reti_uc),
+ entry(retr_s), entry(reti_s),
+ entry(retr_us), entry(reti_us),
+ entry(retr_i), entry(reti_i),
+#if __WORDSIZE == 64
+ entry(retr_ui), entry(reti_ui),
+ entry(retr_l), entry(reti_l),
+#endif
entry(retr), entry(reti),
entry(retval_c), entry(retval_uc),
entry(retval_s), entry(retval_us),
jit_live(parser.regval);
}
entry_im(align)
+entry_im(skip)
entry(prolog)
entry_im(frame) entry_im(tramp)
entry(ellipsis)
symbol->value.i = i;
}
entry_ir_ir(allocar)
+entry_ca(arg_c) entry_ca(arg_s)
+entry_ca(arg_i)
+#if __WORDSIZE == 64
+entry_ca(arg_l)
+#endif
entry_ca(arg)
entry_ia(getarg_c) entry_ia(getarg_uc)
entry_ia(getarg_s) entry_ia(getarg_us)
entry_ia(getarg_ui) entry_ia(getarg_l)
#endif
entry_ia(getarg)
+entry_ia(putargr_c) entry_ima(putargi_c)
+entry_ia(putargr_uc) entry_ima(putargi_uc)
+entry_ia(putargr_s) entry_ima(putargi_s)
+entry_ia(putargr_us) entry_ima(putargi_us)
+entry_ia(putargr_i) entry_ima(putargi_i)
+#if __WORDSIZE == 64
+entry_ia(putargr_ui) entry_ima(putargi_ui)
+entry_ia(putargr_l) entry_ima(putargi_l)
+#endif
entry_ia(putargr) entry_ima(putargi)
entry_ir_ir_ir(addr) entry_ir_ir_im(addi)
entry_ir_ir_ir(addxr) entry_ir_ir_im(addxi)
entry_ir_ir_ir(rshr) entry_ir_ir_im(rshi)
entry_ir_ir_ir(rshr_u) entry_ir_ir_im(rshi_u)
entry_ir_ir(negr) entry_ir_ir(comr)
+entry_ir_ir(clor) entry_ir_ir(clzr)
+entry_ir_ir(ctor) entry_ir_ir(ctzr)
entry_ir_ir_ir(ltr) entry_ir_ir_im(lti)
entry_ir_ir_ir(ltr_u) entry_ir_ir_im(lti_u)
entry_ir_ir_ir(ler) entry_ir_ir_im(lei)
entry_ir(jmpr) entry_lb(jmpi)
entry_ir(callr) entry_fn(calli)
entry(prepare)
+entry_ir(pushargr_c) entry_im(pushargi_c)
+entry_ir(pushargr_uc) entry_im(pushargi_uc)
+entry_ir(pushargr_s) entry_im(pushargi_s)
+entry_ir(pushargr_us) entry_im(pushargi_us)
+entry_ir(pushargr_i) entry_im(pushargi_i)
+#if __WORDSIZE == 64
+entry_ir(pushargr_ui) entry_im(pushargi_ui)
+entry_ir(pushargr_l) entry_im(pushargi_l)
+#endif
entry_ir(pushargr) entry_im(pushargi)
entry_ir(finishr) entry_fn(finishi)
entry(ret)
+entry_ir(retr_c) entry_im(reti_c)
+entry_ir(retr_uc) entry_im(reti_uc)
+entry_ir(retr_s) entry_im(reti_s)
+entry_ir(retr_us) entry_im(reti_us)
+entry_ir(retr_i) entry_im(reti_i)
+#if __WORDSIZE == 64
+entry_ir(retr_ui) entry_im(reti_ui)
+entry_ir(retr_l) entry_im(reti_l)
+#endif
entry_ir(retr) entry_im(reti)
entry_ir(retval_c) entry_ir(retval_uc)
entry_ir(retval_s) entry_ir(retval_us)
opt_short += snprintf(cmdline + opt_short,
sizeof(cmdline) - opt_short,
" -D__mips__=1");
+ opt_short += snprintf(cmdline + opt_short,
+ sizeof(cmdline) - opt_short,
+ " -D__mips_isa_rev=%d", jit_cpu.release);
#endif
#if defined(__arm__)
opt_short += snprintf(cmdline + opt_short,
--- /dev/null
+/*
+ * Simple test of (un)protecting a code buffer.
+ */
+
+#include <lightning.h>
+#include <stdio.h>
+#include <assert.h>
+
+#define MARKER 10
+
+int
+main(int argc, char *argv[])
+{
+ jit_state_t *_jit;
+ jit_node_t *load, *label, *ok;
+ unsigned char *ptr;
+ void (*function)(void);
+ int mmap_prot, mmap_flags;
+
+ init_jit(argv[0]);
+ _jit = jit_new_state();
+
+ jit_prolog();
+
+ load = jit_movi(JIT_R0, 0);
+ jit_ldr_c(JIT_R0, JIT_R0);
+ ok = jit_forward();
+ jit_patch_at(jit_beqi(JIT_R0, MARKER), ok);
+ jit_prepare();
+ jit_pushargi(1);
+ jit_finishi(exit);
+ label = jit_indirect();
+ jit_skip(1); /* Reserves enough space for a byte. */
+ jit_patch_at(load, label);
+ jit_link(ok);
+ jit_prepare();
+ jit_pushargi((jit_word_t)"%s\n");
+ jit_ellipsis();
+ jit_pushargi((jit_word_t)"ok");
+ jit_finishi(printf);
+
+ function = jit_emit();
+ if (function == NULL)
+ abort();
+
+ jit_unprotect ();
+ ptr = jit_address (label);
+ *ptr = MARKER;
+ jit_protect ();
+
+ jit_clear_state();
+
+ (*function)();
+
+ jit_destroy_state();
+ finish_jit();
+
+ return (0);
+}
putr:
prolog
frame 160
- arg $ac
- arg $auc
- arg $as
- arg $aus
- arg $ai
+ arg_c $ac
+ arg_c $auc
+ arg_s $as
+ arg_s $aus
+ arg_i $ai
#if __WORDSIZE == 64
- arg $aui
- arg $al
+ arg_i $aui
+ arg_l $al
#endif
arg_f $af
arg_d $ad
arg $a
#if __WORDSIZE == 64
- arg $_l
- arg $_ui
+ arg_l $_l
+ arg_i $_ui
#endif
- arg $_i
- arg $_us
- arg $_s
- arg $_uc
- arg $_c
+ arg_i $_i
+ arg_s $_us
+ arg_s $_s
+ arg_c $_uc
+ arg_c $_c
getarg_c %r0 $ac
negr %r0 %r0
- putargr %r0 $ac
+ putargr_c %r0 $ac
getarg_uc %r0 $auc
negr %r0 %r0
- putargr %r0 $auc
+ putargr_uc %r0 $auc
getarg_s %r0 $as
negr %r0 %r0
- putargr %r0 $as
+ putargr_s %r0 $as
getarg_us %r0 $aus
negr %r0 %r0
- putargr %r0 $aus
+ putargr_us %r0 $aus
getarg_i %r0 $ai
negr %r0 %r0
- putargr %r0 $ai
+ putargr_i %r0 $ai
#if __WORDSIZE == 64
getarg_ui %r0 $aui
negr %r0 %r0
- putargr %r0 $aui
+ putargr_ui %r0 $aui
getarg_l %r0 $al
negr %r0 %r0
- putargr %r0 $al
+ putargr_l %r0 $al
#endif
getarg_f %f0 $af
negr_f %f0 %f0
#if __WORDSIZE == 64
getarg_l %r0 $_l
negr %r0 %r0
- putargr %r0 $_l
+ putargr_l %r0 $_l
getarg_ui %r0 $_ui
negr %r0 %r0
- putargr %r0 $_ui
+ putargr_ui %r0 $_ui
#endif
getarg_i %r0 $_i
negr %r0 %r0
- putargr %r0 $_i
+ putargr_i %r0 $_i
getarg_us %r0 $_us
negr %r0 %r0
- putargr %r0 $_us
+ putargr_us %r0 $_us
getarg_s %r0 $_s
negr %r0 %r0
- putargr %r0 $_s
+ putargr_s %r0 $_s
getarg_uc %r0 $_uc
negr %r0 %r0
- putargr %r0 $_uc
+ putargr_uc %r0 $_uc
getarg_c %r0 $_c
negr %r0 %r0
- putargr %r0 $_c
+ putargr_c %r0 $_c
jmpi _putr
rputr:
- putargi 17 $ac
- putargi 16 $auc
- putargi 15 $as
- putargi 14 $aus
- putargi 13 $ai
+ putargi_c 17 $ac
+ putargi_uc 16 $auc
+ putargi_s 15 $as
+ putargi_us 14 $aus
+ putargi_i 13 $ai
#if __WORDSIZE == 64
- putargi 12 $aui
- putargi 11 $al
+ putargi_ui 12 $aui
+ putargi_l 11 $al
#endif
putargi_f 10 $af
putargi_d 9 $ad
putargi 8 $a
#if __WORDSIZE == 64
- putargi 7 $_l
- putargi 6 $_ui
+ putargi_l 7 $_l
+ putargi_ui 6 $_ui
#endif
- putargi 5 $_i
- putargi 4 $_us
- putargi 3 $_s
- putargi 2 $_uc
- putargi 1 $_c
+ putargi_i 5 $_i
+ putargi_us 4 $_us
+ putargi_s 3 $_s
+ putargi_uc 2 $_uc
+ putargi_c 1 $_c
jmpi _puti
rputi:
ret
_putr:
prolog
tramp 160
- arg $ac
- arg $auc
- arg $as
- arg $aus
- arg $ai
+ arg_c $ac
+ arg_c $auc
+ arg_s $as
+ arg_s $aus
+ arg_i $ai
#if __WORDSIZE == 64
- arg $aui
- arg $al
+ arg_i $aui
+ arg_l $al
#endif
arg_f $af
arg_d $ad
arg $a
#if __WORDSIZE == 64
- arg $_l
- arg $_ui
+ arg_l $_l
+ arg_i $_ui
#endif
- arg $_i
- arg $_us
- arg $_s
- arg $_uc
- arg $_c
+ arg_i $_i
+ arg_s $_us
+ arg_s $_s
+ arg_c $_uc
+ arg_c $_c
getarg_c %r0 $ac
beqi rac %r0 -1
calli @abort
calli @abort
ra:
#if __WORDSIZE == 64
- getarg %r0 $_l
+ getarg_l %r0 $_l
beqi r_l %r0 -11
calli @abort
r_l:
_puti:
prolog
tramp 160
- arg $ac
- arg $auc
- arg $as
- arg $aus
- arg $ai
+ arg_c $ac
+ arg_c $auc
+ arg_s $as
+ arg_s $aus
+ arg_i $ai
#if __WORDSIZE == 64
- arg $aui
- arg $al
+ arg_i $aui
+ arg_l $al
#endif
arg_f $af
arg_d $ad
arg $a
#if __WORDSIZE == 64
- arg $_l
- arg $_ui
+ arg_l $_l
+ arg_i $_ui
#endif
- arg $_i
- arg $_us
- arg $_s
- arg $_uc
- arg $_c
+ arg_i $_i
+ arg_s $_us
+ arg_s $_s
+ arg_c $_uc
+ arg_c $_c
getarg_c %r0 $ac
beqi iac %r0 17
calli @abort
calli @abort
ia:
#if __WORDSIZE == 64
- getarg %r0 $_l
+ getarg_l %r0 $_l
beqi i_l %r0 7
calli @abort
i_l:
main:
prolog
prepare
- pushargi 1
- pushargi 2
- pushargi 3
- pushargi 4
- pushargi 5
+ pushargi_c 1
+ pushargi_uc 2
+ pushargi_s 3
+ pushargi_us 4
+ pushargi_i 5
#if __WORDSIZE == 64
- pushargi 6
- pushargi 7
+ pushargi_ui 6
+ pushargi_l 7
#endif
- pushargi_f 8
- pushargi_d 9
- pushargi 10
+ pushargi_f 8
+ pushargi_d 9
+ pushargi 10
#if __WORDSIZE == 64
- pushargi 11
- pushargi 12
+ pushargi_l 11
+ pushargi_ui 12
#endif
- pushargi 13
- pushargi 14
- pushargi 15
- pushargi 16
- pushargi 17
+ pushargi_i 13
+ pushargi_us 14
+ pushargi_s 15
+ pushargi_uc 16
+ pushargi_c 17
finishi putr
prepare
pushargi 1
--- /dev/null
+/*
+ * Simple test for x86_64 rip relative access that can also be useful
+ * on other ports when data is close to instruction pointer.
+ */
+
+#include <lightning.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/mman.h>
+#if defined(__sgi)
+# include <fcntl.h>
+#endif
+#include <unistd.h>
+
+#ifndef MAP_ANON
+# define MAP_ANON MAP_ANONYMOUS
+# ifndef MAP_ANONYMOUS
+# define MAP_ANONYMOUS 0
+# endif
+#endif
+
+#if !defined(__sgi)
+#define mmap_fd -1
+#endif
+
+int
+main(int argc, char *argv[])
+{
+ jit_uint8_t *ptr;
+ jit_state_t *_jit;
+ jit_word_t length;
+#if defined(__sgi)
+ int mmap_fd;
+#endif
+ void (*function)(void);
+ int mmap_prot, mmap_flags, result, pagesize;
+ int mult;
+
+#if defined(__ia64__)
+ mult = 8;
+#else
+ mult = 2;
+#endif
+ pagesize = sysconf(_SC_PAGESIZE);
+ if (pagesize < 4096)
+ pagesize = 4096;
+
+#if defined(__sgi)
+ mmap_fd = open("/dev/zero", O_RDWR);
+#endif
+
+ mmap_prot = PROT_READ | PROT_WRITE;
+#if !(__OpenBSD__ || __APPLE__)
+ mmap_prot |= PROT_EXEC;
+#endif
+#if __NetBSD__
+ mmap_prot = PROT_MPROTECT(mmap_prot);
+ mmap_flags = 0;
+#else
+ mmap_flags = MAP_PRIVATE;
+#endif
+ mmap_flags |= MAP_ANON;
+ ptr = mmap(NULL, pagesize * mult, mmap_prot, mmap_flags, mmap_fd, 0);
+ assert(ptr != MAP_FAILED);
+#if defined(__sgi)
+ close(mmap_fd);
+#endif
+
+ init_jit(argv[0]);
+ _jit = jit_new_state();
+
+ jit_prolog();
+ jit_movi(JIT_R0, 'c');
+ jit_sti_c(ptr + 0, JIT_R0);
+ jit_movi(JIT_R0, 'C');
+ jit_sti_c(ptr + 1, JIT_R0);
+
+ jit_movi(JIT_R0, 's');
+ jit_sti_s(ptr + 2, JIT_R0);
+ jit_movi(JIT_R0, 'S');
+ jit_sti_s(ptr + 4, JIT_R0);
+
+ jit_movi(JIT_R0, 'i');
+ jit_sti_i(ptr + 8, JIT_R0);
+#if __WORDSIZE == 64
+ jit_movi(JIT_R0, 'I');
+ jit_sti_i(ptr + 12, JIT_R0);
+
+ jit_movi(JIT_R0, 'l');
+ jit_sti_l(ptr + 16, JIT_R0);
+#endif
+ jit_movi_f(JIT_F0, 1.0);
+ jit_sti_f(ptr + 24, JIT_F0);
+ jit_movi_d(JIT_F0, 2.0);
+ jit_sti_d(ptr + 32, JIT_F0);
+
+ jit_ldi_c(JIT_R0, ptr + 0);
+ jit_ldi_s(JIT_R1, ptr + 2);
+ jit_ldi_i(JIT_R2, ptr + 8);
+#if __WORDSIZE == 64
+ jit_ldi_l(JIT_V0, ptr + 16);
+#endif
+ jit_prepare();
+#if __WORDSIZE == 64
+ jit_pushargi((jit_word_t)"%c %c %c %c\n");
+#else
+ jit_pushargi((jit_word_t)"%c %c %c l\n");
+#endif
+ jit_ellipsis();
+ jit_pushargr(JIT_R0);
+ jit_pushargr(JIT_R1);
+ jit_pushargr(JIT_R2);
+#if __WORDSIZE == 64
+ jit_pushargr(JIT_V0);
+#endif
+ jit_finishi(printf);
+
+ jit_ldi_uc(JIT_R0, ptr + 1);
+ jit_ldi_us(JIT_R1, ptr + 4);
+#if __WORDSIZE == 64
+ jit_ldi_ui(JIT_R2, ptr + 12);
+#endif
+ jit_prepare();
+#if __WORDSIZE == 64
+ jit_pushargi((jit_word_t)"%c %c %c\n");
+#else
+ jit_pushargi((jit_word_t)"%c %c I\n");
+#endif
+ jit_ellipsis();
+ jit_pushargr(JIT_R0);
+ jit_pushargr(JIT_R1);
+#if __WORDSIZE == 64
+ jit_pushargr(JIT_R2);
+#endif
+ jit_finishi(printf);
+
+ jit_ldi_f(JIT_F0, ptr + 24);
+ jit_extr_f_d(JIT_F0, JIT_F0);
+ jit_ldi_d(JIT_F1, ptr + 32);
+
+ jit_prepare();
+ jit_pushargi((jit_word_t)"%.1f %.1f\n");
+ jit_ellipsis();
+ jit_pushargr_d(JIT_F0);
+ jit_pushargr_d(JIT_F1);
+ jit_finishi(printf);
+
+ jit_realize();
+
+ jit_set_code(ptr + pagesize, pagesize * (mult - 1));
+
+ #if __NetBSD__ || __OpenBSD__ || __APPLE__
+ result = mprotect(ptr, pagesize, PROT_READ | PROT_WRITE);
+ assert(result == 0);
+#endif
+ function = jit_emit();
+ if (function == NULL)
+ abort();
+
+ //jit_disassemble();
+ jit_clear_state();
+#if __NetBSD__ || __OpenBSD__ || __APPLE__
+ result = mprotect(ptr + pagesize, pagesize, PROT_READ | PROT_EXEC);
+ assert(result == 0);
+#endif
+ (*function)();
+ jit_destroy_state();
+ finish_jit();
+
+ munmap(ptr, pagesize * mult);
+
+ return (0);
+}
--- /dev/null
+c s i l
+C S I
+1.0 2.0
int mmap_fd;
#endif
void (*function)(void);
- int mmap_prot, mmap_flags;
+ int mmap_prot, mmap_flags, result;
#if defined(__sgi)
mmap_fd = open("/dev/zero", O_RDWR);
#endif
mmap_prot = PROT_READ | PROT_WRITE;
-#if !__OpenBSD__
+#if !(__OpenBSD__ || __APPLE__)
mmap_prot |= PROT_EXEC;
#endif
#if __NetBSD__
abort();
#if __NetBSD__
- assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_WRITE) == 0);
+ result = mprotect(ptr, 1024 * 1024, PROT_READ | PROT_WRITE);
+ assert(result == 0);
#endif
/* and calling again with enough space works */
jit_set_code(ptr, 1024 * 1024);
abort();
jit_clear_state();
-#if __NetBSD__ || __OpenBSD__
- assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_EXEC) == 0);
+#if __NetBSD__ || __OpenBSD__ || __APPLE__
+ result = mprotect(ptr, 1024 * 1024, PROT_READ | PROT_EXEC);
+ assert(result == 0);
#endif
(*function)();
jit_destroy_state();
--- /dev/null
+.data 32
+fmt:
+.c "%d\n"
+.code
+ prolog
+ skip 4
+ prepare
+ pushargi fmt
+ ellipsis
+ pushargi 10
+ finishi @printf
+ ret
+ epilog
#define fill_us fill_s
#define fill_ui fill_i
-#define ARG( T, N) arg $arg##T##N
+#define ARG( T, N) arg##T $arg##T##N
#define ARGF( T, N) arg##T $arg##T##N
#define ARG1( K, T) ARG##K(T, 0)
#define ARG2( K, T) ARG1( K, T) ARG##K(T, 1)
#define ARG15(K, T) ARG14(K, T) ARG##K(T, 14)
#define ARG16(K, T) ARG15(K, T) ARG##K(T, 15)
#define ARG_c(N) ARG##N( , _c)
-#define ARG_uc(N) ARG##N( , _uc)
+#define ARG_uc(N) ARG##N( , _c)
#define ARG_s(N) ARG##N( , _s)
-#define ARG_us(N) ARG##N( , _us)
+#define ARG_us(N) ARG##N( , _s)
#define ARG_i(N) ARG##N( , _i)
-#define ARG_ui(N) ARG##N( , _ui)
+#define ARG_ui(N) ARG##N( , _i)
#define ARG_l(N) ARG##N( , _l)
#define ARG_f(N) ARG##N(F, _f)
#define ARG_d(N) ARG##N(F, _d)
-#define CHK(N, T, V) \
- getarg %r0 $arg##T##V \
+#define CHK(N, T, TT, V) \
+ getarg##T %r0 $arg##TT##V \
ldxi##T %r1 %v0 $(V * szof##T) \
beqr N##T##V %r0 %r1 \
calli @abort \
N##T##V:
-#define CHKF(N, T, V) \
- getarg##T %f0 $arg##T##V \
+#define CHKF(N, T, TT, V) \
+ getarg##T %f0 $arg##TT##V \
ldxi##T %f1 %v0 $(V * szof##T) \
beqr##T N##T##V %f0 %f1 \
calli @abort \
N##T##V:
-#define GET1( K, N, T, V) CHK##K(N, T, 0)
-#define GET2( K, N, T, V) GET1( K, N, T, V) CHK##K(N, T, 1)
-#define GET3( K, N, T, V) GET2( K, N, T, V) CHK##K(N, T, 2)
-#define GET4( K, N, T, V) GET3( K, N, T, V) CHK##K(N, T, 3)
-#define GET5( K, N, T, V) GET4( K, N, T, V) CHK##K(N, T, 4)
-#define GET6( K, N, T, V) GET5( K, N, T, V) CHK##K(N, T, 5)
-#define GET7( K, N, T, V) GET6( K, N, T, V) CHK##K(N, T, 6)
-#define GET8( K, N, T, V) GET7( K, N, T, V) CHK##K(N, T, 7)
-#define GET9( K, N, T, V) GET8( K, N, T, V) CHK##K(N, T, 8)
-#define GET10(K, N, T, V) GET9( K, N, T, V) CHK##K(N, T, 9)
-#define GET11(K, N, T, V) GET10(K, N, T, V) CHK##K(N, T, 10)
-#define GET12(K, N, T, V) GET11(K, N, T, V) CHK##K(N, T, 11)
-#define GET13(K, N, T, V) GET12(K, N, T, V) CHK##K(N, T, 12)
-#define GET14(K, N, T, V) GET13(K, N, T, V) CHK##K(N, T, 13)
-#define GET15(K, N, T, V) GET14(K, N, T, V) CHK##K(N, T, 14)
-#define GET16(K, N, T, V) GET15(K, N, T, V) CHK##K(N, T, 15)
+#define GET1( K, N, T, TT, V) CHK##K(N, T, TT, 0)
+#define GET2( K, N, T, TT, V) GET1( K, N, T, TT, V) CHK##K(N, T, TT, 1)
+#define GET3( K, N, T, TT, V) GET2( K, N, T, TT, V) CHK##K(N, T, TT, 2)
+#define GET4( K, N, T, TT, V) GET3( K, N, T, TT, V) CHK##K(N, T, TT, 3)
+#define GET5( K, N, T, TT, V) GET4( K, N, T, TT, V) CHK##K(N, T, TT, 4)
+#define GET6( K, N, T, TT, V) GET5( K, N, T, TT, V) CHK##K(N, T, TT, 5)
+#define GET7( K, N, T, TT, V) GET6( K, N, T, TT, V) CHK##K(N, T, TT, 6)
+#define GET8( K, N, T, TT, V) GET7( K, N, T, TT, V) CHK##K(N, T, TT, 7)
+#define GET9( K, N, T, TT, V) GET8( K, N, T, TT, V) CHK##K(N, T, TT, 8)
+#define GET10(K, N, T, TT, V) GET9( K, N, T, TT, V) CHK##K(N, T, TT, 9)
+#define GET11(K, N, T, TT, V) GET10(K, N, T, TT, V) CHK##K(N, T, TT, 10)
+#define GET12(K, N, T, TT, V) GET11(K, N, T, TT, V) CHK##K(N, T, TT, 11)
+#define GET13(K, N, T, TT, V) GET12(K, N, T, TT, V) CHK##K(N, T, TT, 12)
+#define GET14(K, N, T, TT, V) GET13(K, N, T, TT, V) CHK##K(N, T, TT, 13)
+#define GET15(K, N, T, TT, V) GET14(K, N, T, TT, V) CHK##K(N, T, TT, 14)
+#define GET16(K, N, T, TT, V) GET15(K, N, T, TT, V) CHK##K(N, T, TT, 15)
-#define GET_c(N, M) GET##N( , c##N, _c, M)
-#define GET_uc(N, M) GET##N( , uc##N, _uc, M)
-#define GET_s(N, M) GET##N( , s##N, _s, M)
-#define GET_us(N, M) GET##N( , us##N, _us, M)
-#define GET_i(N, M) GET##N( , i##N, _i, M)
-#define GET_ui(N, M) GET##N( , ui##N, _ui, M)
-#define GET_l(N, M) GET##N( , l##N, _l, M)
-#define GET_f(N, M) GET##N(F, f##N, _f, M)
-#define GET_d(N, M) GET##N(F, d##N, _d, M)
+#define GET_c(N, M) GET##N( , c##N, _c, _c, M)
+#define GET_uc(N, M) GET##N( , uc##N, _uc, _c, M)
+#define GET_s(N, M) GET##N( , s##N, _s, _s, M)
+#define GET_us(N, M) GET##N( , us##N, _us, _s, M)
+#define GET_i(N, M) GET##N( , i##N, _i, _i, M)
+#define GET_ui(N, M) GET##N( , ui##N, _ui, _i, M)
+#define GET_l(N, M) GET##N( , l##N, _l, _l, M)
+#define GET_f(N, M) GET##N(F, f##N, _f, _f, M)
+#define GET_d(N, M) GET##N(F, d##N, _d, _d, M)
-#define PUSH( T, V) pushargi V
+#define PUSH( T, V) pushargi##T V
#define PUSHF( T, V) pushargi##T V
#define PUSH0( K, T) /**/
#define PUSH1( K, T) PUSH##K(T, 0)
ret \
epilog
-#define DEFN(N, M, T) \
+#define DEFN(N, M, T, TT) \
name test##T##_##N \
test##T##_##N: \
prolog \
arg $argp \
/* stack buffer in %v0 */ \
getarg %v0 $argp \
- ARG##T(N) \
+ ARG##TT(N) \
/* validate arguments */ \
GET##T(N, M) \
/* heap buffer in %v1 */ \
ret \
epilog
-#define DEF( T) \
+#define DEF( T, TT) \
DEF0( T) \
- DEFN( 1, 0, T) \
- DEFN( 2, 1, T) \
- DEFN( 3, 2, T) \
- DEFN( 4, 3, T) \
- DEFN( 5, 4, T) \
- DEFN( 6, 5, T) \
- DEFN( 7, 6, T) \
- DEFN( 8, 7, T) \
- DEFN( 9, 8, T) \
- DEFN(10, 9, T) \
- DEFN(11, 10, T) \
- DEFN(12, 11, T) \
- DEFN(13, 12, T) \
- DEFN(14, 13, T) \
- DEFN(15, 14, T) \
- DEFN(16, 15, T) \
+ DEFN( 1, 0, T, TT) \
+ DEFN( 2, 1, T, TT) \
+ DEFN( 3, 2, T, TT) \
+ DEFN( 4, 3, T, TT) \
+ DEFN( 5, 4, T, TT) \
+ DEFN( 6, 5, T, TT) \
+ DEFN( 7, 6, T, TT) \
+ DEFN( 8, 7, T, TT) \
+ DEFN( 9, 8, T, TT) \
+ DEFN(10, 9, T, TT) \
+ DEFN(11, 10, T, TT) \
+ DEFN(12, 11, T, TT) \
+ DEFN(13, 12, T, TT) \
+ DEFN(14, 13, T, TT) \
+ DEFN(15, 14, T, TT) \
+ DEFN(16, 15, T, TT) \
DEFX(T)
#define CALL(T) calli test##T##_17
FILLF(_f)
FILLF(_d)
- DEF(_c)
- DEF(_uc)
- DEF(_s)
- DEF(_us)
- DEF(_i)
+ DEF(_c, _c)
+ DEF(_uc, _c)
+ DEF(_s, _s)
+ DEF(_us, _s)
+ DEF(_i, _i)
#if __WORDSIZE == 64
- DEF(_ui)
- DEF(_l)
+ DEF(_ui, _i)
+ DEF(_l, _l)
#endif
- DEF(_f)
- DEF(_d)
+ DEF(_f, _f)
+ DEF(_d, _d)
name main
main:
dnl
-dnl Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc.
dnl
dnl This file is part of GNU lightning.
dnl
dnl
AC_PREREQ([2.71])
-AC_INIT([GNU lightning],[2.1.3],[pcpa@gnu.org],[lightning])
+AC_INIT([GNU lightning],[2.2.1],[pcpa@gnu.org],[lightning])
AC_CONFIG_AUX_DIR([build-aux])
AC_CANONICAL_TARGET
AC_CONFIG_SRCDIR([Makefile.am])
fi ;;
*) ;;
esac ;;
+ aarch64)
+ case "$host_os" in
+ darwin*)
+ LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DPACKED_STACK=1" ;;
+ *) ;;
+ esac ;;
*) ;;
esac
LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEVEL_DISASSEMBLER=1"
fi
+# This option is only useful during development.
+AC_ARG_ENABLE(devel-get-jit-size,
+ AS_HELP_STRING([--enable-devel-get-jit-size],
+ [Devel mode to regenerate jit size information]),
+ [GET_JIT_SIZE=$enableval], [GET_JIT_SIZE=no])
+AM_CONDITIONAL(get_jit_size, [test $GET_JIT_SIZE = yes])
+
AC_ARG_ENABLE(assertions,
AS_HELP_STRING([--enable-assertions],
[Enable runtime code generation assertions]),
[DEBUG=$enableval], [DEBUG=auto])
-if test "x$DEBUG" = xyes; then
+
+# This option might be made default in the future
+# Currently it is only useful to ensure existing code will work
+# if PACKED_STACK is also defined.
+AC_ARG_ENABLE(devel-strong-type-checking,
+ AS_HELP_STRING([--enable-devel-strong-type-checking],
+ [Devel mode for strong type checking]),
+ [STRONG_TYPE_CHECKING=$enableval], [STRONG_TYPE_CHECKING=no])
+if test "x$DEBUG" = xyes -o x"$STRONG_TYPE_CHECKING" = xyes; then
LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEBUG=1"
else
LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DNDEBUG"
DEBUG=no
fi
+AM_CONDITIONAL(strong_type_checking, [test $STRONG_TYPE_CHECKING = yes])
-# This option is only useful during development.
-AC_ARG_ENABLE(devel-get-jit-size,
- AS_HELP_STRING([--enable-devel-get-jit-size],
- [Devel mode to regenerate jit size information]),
- [GET_JIT_SIZE=$enableval], [GET_JIT_SIZE=no])
-AM_CONDITIONAL(get_jit_size, [test $GET_JIT_SIZE = yes])
+AC_CHECK_LIB(dl, dlopen, [HAVE_LIBDL="yes"])
+AC_CHECK_LIB(dld, dlopen, [HAVE_LIBDLD="yes"])
-case "$host_os" in
- *bsd*|osf*) SHLIB="" ;;
- *hpux*) SHLIB="-ldld" ;;
- *) SHLIB="-ldl" ;;
-esac
+if test "x$HAVE_LIBDL" = xyes; then
+ SHLIB="-ldl";
+elif test "x$HAVE_LIBDLD" = xyes; then
+ SHLIB="-ldld";
+else
+ SHLIB="";
+fi
AC_SUBST(SHLIB)
cpu=
int main(void) {
int ac, flags;
unsigned int eax, ebx, ecx, edx;
- if (__WORDSIZE == 64)
+ if (sizeof(long) == 8)
return 1;
__asm__ volatile ("pushfl;\n\t"
"popl %0;\n\t"
#
-# Copyright 2012-2022 Free Software Foundation, Inc.
+# Copyright 2012-2023 Free Software Foundation, Inc.
#
# This file is part of GNU lightning.
#
# License for more details.
#
-AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
+AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include \
+ -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
info_TEXINFOS = lightning.texi
MOSTLYCLEANFILES = lightning.tmp
./configure
@end example
-@lightning{} supports the @code{--enable-disassembler} option, that
-enables linking to GNU binutils and optionally print human readable
+The @file{configure} accepts the @code{--enable-disassembler} option,
+hat enables linking to GNU binutils and optionally print human readable
disassembly of the jit code. This option can be disabled by the
@code{--disable-disassembler} option.
-Another option that @file{configure} accepts is
-@code{--enable-assertions}, which enables several consistency checks in
-the run-time assemblers. These are not usually needed, so you can
-decide to simply forget about it; also remember that these consistency
+@file{configure} also accepts the @code{--enable-devel-disassembler},
+option useful to check exactly hat machine instructions were generated
+for a @lightning{} instrction. Basically mixing @code{jit_print} and
+@code{jit_disassembly}.
+
+The @code{--enable-assertions} option, which enables several consistency
+hecks in the run-time assemblers. These are not usually needed, so you
+can decide to simply forget about it; also remember that these consistency
checks tend to slow down your code generator.
+The @code{--enable-devel-strong-type-checking} option that does extra type
+checking using @code{assert}. This option also enables the
+@code{--enable-assertions} unless it is explicitly disabled.
+
+The option @code{--enable-devel-get-jit-size} should only be used
+when doing updates or maintenance to lightning. It regenerates the
+@code{jit_$ARCH]-sz.c} creating a table or maximum bytes usage when
+translating a @lightning{} instruction to machine code.
+
After you've configured @lightning{}, run @file{make} as usual.
@lightning{} has an extensive set of tests to validate it is working
@example
negr _f _d O1 = -O2
comr O1 = ~O2
+clor O1 = number of leading one bits
+clzr O1 = number of leading zero bits
+ctor O1 = number of trailing one bits
+ctzr O1 = number of trailing zero bits
@end example
+Note that @code{ctzr} is basically equivalent of a @code{C} call
+@code{ffs} but indexed at bit zero, not one.
+
+Contrary to @code{__builtin_ctz} and @code{__builtin_clz}, an input
+value of zero is not an error, it just returns the number of bits
+in a word, 64 if @lightning{} generates 64 bit instructions, otherwise
+it returns 32.
+
+The @code{clor} and @code{ctor} are just counterparts of the versions
+that search for zero bits.
+
These unary ALU operations are only defined for float operands.
@example
absr _f _d O1 = fabs(O2)
-sqrtr O1 = sqrt(O2)
+sqrtr _f _d O1 = sqrt(O2)
@end example
Besides requiring the @code{r} modifier, there are no unary operations
both cases, the first can be either a register or an immediate
value. Values are sign-extended to fit a whole register.
@example
-str _c _uc _s _us _i _ui _l _f _d *O1 = O2
-sti _c _uc _s _us _i _ui _l _f _d *O1 = O2
-stxr _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3
-stxi _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3
+str _c _s _i _l _f _d *O1 = O2
+sti _c _s _i _l _f _d *O1 = O2
+stxr _c _s _i _l _f _d *(O1+O2) = O3
+stxi _c _s _i _l _f _d *(O1+O2) = O3
@end example
-As for the load operations, the @code{_ui} and @code{_l} types are
-only available in 64-bit architectures, and for convenience, there
-is a version without a type modifier for integer or pointer operands
-that uses the appropriate wordsize call.
+Note that the unsigned type modifier is not available, as the store
+only writes to the 1, 2, 4 or 8 sized memory address.
+The @code{_l} type is only available in 64-bit architectures, and for
+convenience, there is a version without a type modifier for integer or
+pointer operands that uses the appropriate wordsize call.
@item Argument management
These are:
@example
prepare (not specified)
va_start (not specified)
-pushargr _f _d
-pushargi _f _d
+pushargr _c _uc _s _us _i _ui _l _f _d
+pushargi _c _uc _s _us _i _ui _l _f _d
va_push (not specified)
-arg _f _d
+arg _c _uc _s _us _i _ui _l _f _d
getarg _c _uc _s _us _i _ui _l _f _d
va_arg _d
-putargr _f _d
-putargi _f _d
+putargr _c _uc _s _us _i _ui _l _f _d
+putargi _c _uc _s _us _i _ui _l _f _d
ret (not specified)
-retr _f _d
+retr _c _uc _s _us _i _ui _l _f _d
+reti _c _uc _s _us _i _ui _l _f _d
reti _f _d
va_end (not specified)
retval _c _uc _s _us _i _ui _l _f _d
left to right order}; and use @code{finish} or @code{call} (explained below)
to perform the actual call.
+Note that @code{arg}, @code{pusharg}, @code{putarg} and @code{ret} when
+handling integer types can be used without a type modifier.
+It is suggested to use matching type modifiers to @code{arg}, @code{putarg}
+and @code{getarg} otherwise problems will happen if generating jit for
+environments that require arguments to be truncated and zero or sign
+extended by the caller and/or excess arguments might be passed packed
+in the stack. Currently only Apple systems with @code{aarch64} cpus are
+known to have this restriction.
+
@code{va_start} returns a @code{C} compatible @code{va_list}. To fetch
arguments, use @code{va_arg} for integers and @code{va_arg_d} for doubles.
@code{va_push} is required when passing a @code{va_list} to another function,
bxsubi _u O2 -= O3@r{, goto }O1@r{ if no overflow}
@end example
+Note that the @code{C} code does not have an @code{O1} argument. It is
+required to always use the return value as an argument to @code{patch},
+@code{patch_at} or @code{patch_abs}.
+
@item Jump and return operations
These accept one argument except @code{ret} and @code{jmpi} which
have none; the difference between @code{finishi} and @code{calli}
align (not specified) @r{align code}
@end example
+Similar to @code{align} is the next instruction, also usually used with
+a label:
+@example
+skip (not specified) @r{skip code}
+@end example
+It is used to specify a minimal number of bytes of nops to be inserted
+before the next instruction.
+
@code{label} is normally used as @code{patch_at} argument for backward
jumps.
assert(addr2 - addr1 == 16); @rem{/* only one of the addresses needs to be remembered */}
@end example
+@code{skip} is useful for reserving space in the code buffer that can
+later be filled (possibly with the help of the pair of functions
+@code{jit_unprotect} and @code{jit_protect}).
+
@item Function prolog
These macros are used to set up a function prolog. The @code{allocai}
is useful to know the live range of register arguments, as those
are very fast to read and write, but have volatile values.
-@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
+@code{callee_save_p} expects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
@code{JIT_Fn}, and will return non zero if the register is callee
save. This call is useful because on several ports, the @code{JIT_Rn}
and @code{JIT_Fn} registers are actually callee save; no need
@table @b
@item x86_64
@example
- sub $0x30,%rsp
- mov %rbp,(%rsp)
- mov %rsp,%rbp
- sub $0x18,%rsp
- mov %rdi,%rax mov %rdi, %rax
- add $0x1,%rax inc %rax
- mov %rbp,%rsp
- mov (%rsp),%rbp
- add $0x30,%rsp
- retq retq
+ mov %rdi,%rax
+ add $0x1,%rax
+ ret
@end example
-In this case, the main overhead is due to the function's prolog and
-epilog, and stack alignment after reserving stack space for word
-to/from float conversions or moving data from/to x87 to/from SSE.
-Note that besides allocating space to save callee saved registers,
-no registers are saved/restored because @lightning{} notices those
-registers are not modified. There is currently no logic to detect
-if it needs to allocate stack space for type conversions neither
-proper leaf function detection, but these are subject to change
-(FIXME).
+In this case, for the x86 port, @lightning{} has simple optimizations
+to understand it is a leaf function, and that it is not required to
+create a stack frame nor update the stack pointer.
@end table
@node printf
in = jit_arg();
stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
- jit_getarg_i(JIT_R2, in);
+ jit_getarg(JIT_R2, in);
while (*expr) @{
char buf[32];
counterpart, it is an error to pass @code{NULL} pointers as arguments.
@end deftypefun
+@section Protection
+Unless an alternate code buffer is used (see below), @code{jit_emit}
+set the access protections that the code buffer's memory can be read and
+executed, but not modified. One can use the following functions after
+@code{jit_emit} but before @code{jit_clear} to temporarily lift the
+protection:
+
+@deftypefun void jit_unprotect ()
+Changes the access protection that the code buffer's memory can be read and
+modified. Before the emitted code can be invoked, @code{jit_protect}
+has to be called to reset the change.
+
+This procedure has no effect when an alternate code buffer (see below) is used.
+@end deftypefun
+
+@deftypefun void jit_protect ()
+Changes the access protection that the code buffer's memory can be read and
+executed.
+
+This procedure has no effect when an alternate code buffer (see below) is used.
+@end deftypefun
+
@section Alternate code buffer
To instruct @lightning{} to use an alternate code buffer it is required
to call @code{jit_realize} before @code{jit_emit}, and then query states
fn = jit_note(NULL, 0);
jit_prolog();
- in = jit_arg();
+ in = jit_arg_i();
stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
jit_getarg_i(JIT_R2, in);
#
-# Copyright 2000, 2001, 2002, 2012-2022 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc.
#
# This file is part of GNU lightning.
#
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#define jit_align(u) jit_new_node_w(jit_code_align, u)
jit_code_live, jit_code_align,
jit_code_save, jit_code_load,
+#define jit_skip(u) jit_new_node_w(jit_code_skip, u)
+ jit_code_skip,
#define jit_name(u) _jit_name(_jit,u)
jit_code_name,
#define jit_note(u, v) _jit_note(_jit, u, v)
#define jit_allocar(u, v) _jit_allocar(_jit,u,v)
jit_code_allocai, jit_code_allocar,
-#define jit_arg() _jit_arg(_jit)
- jit_code_arg,
+#define jit_arg_c() _jit_arg(_jit, jit_code_arg_c)
+#define jit_arg_s() _jit_arg(_jit, jit_code_arg_s)
+#define jit_arg_i() _jit_arg(_jit, jit_code_arg_i)
+# if __WORDSIZE == 32
+# define jit_arg() jit_arg_i()
+#else
+# define jit_arg_l() _jit_arg(_jit, jit_code_arg_l)
+# define jit_arg() jit_arg_l()
+#endif
+ jit_code_arg_c, jit_code_arg_s,
+ jit_code_arg_i, jit_code_arg_l,
+#if __WORDSIZE == 32
+# define jit_code_arg jit_code_arg_i
+#else
+# define jit_code_arg jit_code_arg_l
+#endif
+
#define jit_getarg_c(u,v) _jit_getarg_c(_jit,u,v)
#define jit_getarg_uc(u,v) _jit_getarg_uc(_jit,u,v)
- jit_code_getarg_c, jit_code_getarg_uc,
#define jit_getarg_s(u,v) _jit_getarg_s(_jit,u,v)
#define jit_getarg_us(u,v) _jit_getarg_us(_jit,u,v)
- jit_code_getarg_s, jit_code_getarg_us,
#define jit_getarg_i(u,v) _jit_getarg_i(_jit,u,v)
#if __WORDSIZE == 32
# define jit_getarg(u,v) jit_getarg_i(u,v)
#else
-# define jit_getarg(u,v) jit_getarg_l(u,v)
# define jit_getarg_ui(u,v) _jit_getarg_ui(_jit,u,v)
# define jit_getarg_l(u,v) _jit_getarg_l(_jit,u,v)
+# define jit_getarg(u,v) jit_getarg_l(u,v)
#endif
+ jit_code_getarg_c, jit_code_getarg_uc,
+ jit_code_getarg_s, jit_code_getarg_us,
jit_code_getarg_i, jit_code_getarg_ui,
jit_code_getarg_l,
-# define jit_putargr(u,v) _jit_putargr(_jit,u,v)
-# define jit_putargi(u,v) _jit_putargi(_jit,u,v)
- jit_code_putargr, jit_code_putargi,
+#if __WORDSIZE == 32
+# define jit_code_getarg jit_code_getarg_i
+#else
+# define jit_code_getarg jit_code_getarg_l
+#endif
+
+#define jit_putargr_c(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_c)
+#define jit_putargi_c(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_c)
+#define jit_putargr_uc(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_uc)
+#define jit_putargi_uc(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_uc)
+#define jit_putargr_s(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_s)
+#define jit_putargi_s(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_s)
+#define jit_putargr_us(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_us)
+#define jit_putargi_us(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_us)
+#define jit_putargr_i(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_i)
+#define jit_putargi_i(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_i)
+#if __WORDSIZE == 32
+# define jit_putargr(u,v) jit_putargr_i(u,v)
+# define jit_putargi(u,v) jit_putargi_i(u,v)
+#else
+# define jit_putargr_ui(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_ui)
+# define jit_putargi_ui(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_ui)
+# define jit_putargr_l(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_l)
+# define jit_putargi_l(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_l)
+# define jit_putargr(u,v) jit_putargr_l(u,v)
+# define jit_putargi(u,v) jit_putargi_l(u,v)
+#endif
+ jit_code_putargr_c, jit_code_putargi_c,
+ jit_code_putargr_uc, jit_code_putargi_uc,
+ jit_code_putargr_s, jit_code_putargi_s,
+ jit_code_putargr_us, jit_code_putargi_us,
+ jit_code_putargr_i, jit_code_putargi_i,
+ jit_code_putargr_ui, jit_code_putargi_ui,
+ jit_code_putargr_l, jit_code_putargi_l,
+#if __WORDSIZE == 32
+# define jit_code_putargr jit_code_putargr_i
+# define jit_code_putargi jit_code_putargi_i
+#else
+# define jit_code_putargr jit_code_putargr_l
+# define jit_code_putargi jit_code_putargi_l
+#endif
#define jit_va_start(u) jit_new_node_w(jit_code_va_start, u)
jit_code_va_start,
#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w)
jit_code_movnr, jit_code_movzr,
+ jit_code_casr, jit_code_casi,
+#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x)
+#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x)
+
#define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v)
#define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v)
jit_code_extr_c, jit_code_extr_uc,
#endif
jit_code_extr_i, jit_code_extr_ui,
+#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v)
+ jit_code_bswapr_us,
+#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v)
+ jit_code_bswapr_ui,
+#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
+ jit_code_bswapr_ul,
+#if __WORDSIZE == 32
+#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v)
+#else
+#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
+#endif
+
#define jit_htonr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v)
#define jit_ntohr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v)
jit_code_htonr_us,
#define jit_prepare() _jit_prepare(_jit)
jit_code_prepare,
-#define jit_pushargr(u) _jit_pushargr(_jit,u)
-#define jit_pushargi(u) _jit_pushargi(_jit,u)
- jit_code_pushargr, jit_code_pushargi,
+
+#define jit_pushargr_c(u) _jit_pushargr(_jit,u,jit_code_pushargr_c)
+#define jit_pushargi_c(u) _jit_pushargi(_jit,u,jit_code_pushargi_c)
+#define jit_pushargr_uc(u) _jit_pushargr(_jit,u,jit_code_pushargr_uc)
+#define jit_pushargi_uc(u) _jit_pushargi(_jit,u,jit_code_pushargi_uc)
+#define jit_pushargr_s(u) _jit_pushargr(_jit,u,jit_code_pushargr_s)
+#define jit_pushargi_s(u) _jit_pushargi(_jit,u,jit_code_pushargi_s)
+#define jit_pushargr_us(u) _jit_pushargr(_jit,u,jit_code_pushargr_us)
+#define jit_pushargi_us(u) _jit_pushargi(_jit,u,jit_code_pushargi_us)
+#define jit_pushargr_i(u) _jit_pushargr(_jit,u,jit_code_pushargr_i)
+#define jit_pushargi_i(u) _jit_pushargi(_jit,u,jit_code_pushargi_i)
+#if __WORDSIZE == 32
+# define jit_pushargr(u) jit_pushargr_i(u)
+# define jit_pushargi(u) jit_pushargi_i(u)
+#else
+# define jit_pushargr_ui(u) _jit_pushargr(_jit,u,jit_code_pushargr_ui)
+# define jit_pushargi_ui(u) _jit_pushargi(_jit,u,jit_code_pushargi_ui)
+# define jit_pushargr_l(u) _jit_pushargr(_jit,u,jit_code_pushargr_l)
+# define jit_pushargi_l(u) _jit_pushargi(_jit,u,jit_code_pushargi_l)
+# define jit_pushargr(u) jit_pushargr_l(u)
+# define jit_pushargi(u) jit_pushargi_l(u)
+#endif
+ jit_code_pushargr_c, jit_code_pushargi_c,
+ jit_code_pushargr_uc, jit_code_pushargi_uc,
+ jit_code_pushargr_s, jit_code_pushargi_s,
+ jit_code_pushargr_us, jit_code_pushargi_us,
+ jit_code_pushargr_i, jit_code_pushargi_i,
+ jit_code_pushargr_ui, jit_code_pushargi_ui,
+ jit_code_pushargr_l, jit_code_pushargi_l,
+#if __WORDSIZE == 32
+# define jit_code_pushargr jit_code_pushargr_i
+# define jit_code_pushargi jit_code_pushargi_i
+#else
+# define jit_code_pushargr jit_code_pushargr_l
+# define jit_code_pushargi jit_code_pushargi_l
+#endif
+
#define jit_finishr(u) _jit_finishr(_jit,u)
#define jit_finishi(u) _jit_finishi(_jit,u)
jit_code_finishr, jit_code_finishi,
#define jit_ret() _jit_ret(_jit)
jit_code_ret,
-#define jit_retr(u) _jit_retr(_jit,u)
-#define jit_reti(u) _jit_reti(_jit,u)
- jit_code_retr, jit_code_reti,
+
+#define jit_retr_c(u) _jit_retr(_jit,u,jit_code_retr_c)
+#define jit_reti_c(u) _jit_reti(_jit,u,jit_code_reti_c)
+#define jit_retr_uc(u) _jit_retr(_jit,u,jit_code_retr_uc)
+#define jit_reti_uc(u) _jit_reti(_jit,u,jit_code_reti_uc)
+#define jit_retr_s(u) _jit_retr(_jit,u,jit_code_retr_s)
+#define jit_reti_s(u) _jit_reti(_jit,u,jit_code_reti_s)
+#define jit_retr_us(u) _jit_retr(_jit,u,jit_code_retr_us)
+#define jit_reti_us(u) _jit_reti(_jit,u,jit_code_reti_us)
+#define jit_retr_i(u) _jit_retr(_jit,u,jit_code_retr_i)
+#define jit_reti_i(u) _jit_reti(_jit,u,jit_code_reti_i)
+#if __WORDSIZE == 32
+# define jit_retr(u) jit_retr_i(u)
+# define jit_reti(u) jit_reti_i(u)
+#else
+# define jit_retr_ui(u) _jit_retr(_jit,u,jit_code_retr_ui)
+# define jit_reti_ui(u) _jit_reti(_jit,u,jit_code_reti_ui)
+# define jit_retr_l(u) _jit_retr(_jit,u,jit_code_retr_l)
+# define jit_reti_l(u) _jit_reti(_jit,u,jit_code_reti_l)
+# define jit_retr(u) jit_retr_l(u)
+# define jit_reti(u) jit_reti_l(u)
+#endif
+ jit_code_retr_c, jit_code_reti_c,
+ jit_code_retr_uc, jit_code_reti_uc,
+ jit_code_retr_s, jit_code_reti_s,
+ jit_code_retr_us, jit_code_reti_us,
+ jit_code_retr_i, jit_code_reti_i,
+ jit_code_retr_ui, jit_code_reti_ui,
+ jit_code_retr_l, jit_code_reti_l,
+#if __WORDSIZE == 32
+# define jit_code_retr jit_code_retr_i
+# define jit_code_reti jit_code_reti_i
+#else
+# define jit_code_retr jit_code_retr_l
+# define jit_code_reti jit_code_reti_l
+#endif
+
#define jit_retval_c(u) _jit_retval_c(_jit,u)
#define jit_retval_uc(u) _jit_retval_uc(_jit,u)
- jit_code_retval_c, jit_code_retval_uc,
#define jit_retval_s(u) _jit_retval_s(_jit,u)
#define jit_retval_us(u) _jit_retval_us(_jit,u)
- jit_code_retval_s, jit_code_retval_us,
#define jit_retval_i(u) _jit_retval_i(_jit,u)
#if __WORDSIZE == 32
# define jit_retval(u) jit_retval_i(u)
#else
-# define jit_retval(u) jit_retval_l(u)
# define jit_retval_ui(u) _jit_retval_ui(_jit,u)
# define jit_retval_l(u) _jit_retval_l(_jit,u)
+# define jit_retval(u) jit_retval_l(u)
#endif
+ jit_code_retval_c, jit_code_retval_uc,
+ jit_code_retval_s, jit_code_retval_us,
jit_code_retval_i, jit_code_retval_ui,
jit_code_retval_l,
+#if __WORDSIZE == 32
+# define jit_code_retval jit_code_retval_i
+#else
+# define jit_code_retval jit_code_retval_l
+#endif
#define jit_epilog() _jit_epilog(_jit)
jit_code_epilog,
#define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v)
#define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v)
-#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v)
- jit_code_bswapr_us,
-#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v)
- jit_code_bswapr_ui,
-#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
- jit_code_bswapr_ul,
-#if __WORDSIZE == 32
-#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v)
-#else
-#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
-#endif
+#define jit_clor(u,v) jit_new_node_ww(jit_code_clor,u,v)
+#define jit_clzr(u,v) jit_new_node_ww(jit_code_clzr,u,v)
+ jit_code_clor, jit_code_clzr,
- jit_code_casr, jit_code_casi,
-#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x)
-#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x)
+#define jit_ctor(u,v) jit_new_node_ww(jit_code_ctor,u,v)
+#define jit_ctzr(u,v) jit_new_node_ww(jit_code_ctzr,u,v)
+ jit_code_ctor, jit_code_ctzr,
jit_code_last_code
} jit_code_t;
extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t);
extern void _jit_ellipsis(jit_state_t*);
-extern jit_node_t *_jit_arg(jit_state_t*);
+extern jit_node_t *_jit_arg(jit_state_t*, jit_code_t);
+
extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*);
extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*);
extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*);
extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*);
extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*);
#endif
-extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*);
-extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*);
+
+extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*, jit_code_t);
+extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*, jit_code_t);
extern void _jit_prepare(jit_state_t*);
extern void _jit_ellipsis(jit_state_t*);
extern void _jit_va_push(jit_state_t*, jit_gpr_t);
-extern void _jit_pushargr(jit_state_t*, jit_gpr_t);
-extern void _jit_pushargi(jit_state_t*, jit_word_t);
+
+extern void _jit_pushargr(jit_state_t*, jit_gpr_t, jit_code_t);
+extern void _jit_pushargi(jit_state_t*, jit_word_t, jit_code_t);
+
extern void _jit_finishr(jit_state_t*, jit_gpr_t);
extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t);
extern void _jit_ret(jit_state_t*);
-extern void _jit_retr(jit_state_t*, jit_gpr_t);
-extern void _jit_reti(jit_state_t*, jit_word_t);
+
+extern void _jit_retr(jit_state_t*, jit_gpr_t, jit_code_t);
+extern void _jit_reti(jit_state_t*, jit_word_t, jit_code_t);
+
extern void _jit_retval_c(jit_state_t*, jit_gpr_t);
extern void _jit_retval_uc(jit_state_t*, jit_gpr_t);
extern void _jit_retval_s(jit_state_t*, jit_gpr_t);
extern void _jit_retval_ui(jit_state_t*, jit_gpr_t);
extern void _jit_retval_l(jit_state_t*, jit_gpr_t);
#endif
+
extern void _jit_epilog(jit_state_t*);
#define jit_patch(u) _jit_patch(_jit,u)
extern void _jit_tramp(jit_state_t*, jit_int32_t);
#define jit_emit() _jit_emit(_jit)
extern jit_pointer_t _jit_emit(jit_state_t*);
+#define jit_unprotect() _jit_unprotect(_jit)
+extern void _jit_unprotect(jit_state_t*);
+#define jit_protect() _jit_protect(_jit)
+extern void _jit_protect(jit_state_t*);
#define jit_print() _jit_print(_jit)
extern void _jit_print(jit_state_t*);
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#define JIT_HASH_CONSTS 0
#define JIT_NUM_OPERANDS 3
+#if __APPLE__
+# define PACKED_STACK 1
+#endif
+
/*
* Types
*/
/*
- * Copyright (C) 2014-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
typedef struct {
jit_uint32_t version : 4;
+ /* this field originally was only used for the 'e' in armv5te.
+ * it can also be used to force hardware division, if setting
+ * version to 7, telling it is armv7r or better. */
jit_uint32_t extend : 1;
/* only generate thumb instructions for thumb2 */
jit_uint32_t thumb : 1;
* due to some memory ordering constraint not being respected, so,
* disable by default */
jit_uint32_t ldrt_strt : 1;
+ /* assume functions called never match jit instruction set?
+ * that is libc, gmp, mpfr, etc functions are in thumb mode and jit
+ * is in arm mode, or the reverse, what may cause a crash upon return
+ * of that function if generating jit for a relative jump.
+ */
+ jit_uint32_t exchange : 1;
} jit_cpu_t;
/*
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
_NOREG,
} jit_reg_t;
+typedef struct {
+ jit_uint32_t clz : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t jit_cpu;
+
#endif /* _jit_ia64_h */
/*
- * Copyright (C) 2022 Free Software Foundation, Inc.
+ * Copyright (C) 2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#if _MIPS_SIM != _ABIO32
# define NEW_ABI 1
+#else
+# define NEW_ABI 0
#endif
/*
_NOREG,
} jit_reg_t;
+typedef struct {
+ jit_uint32_t release : 4;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t jit_cpu;
+
#endif /* _jit_mips_h */
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#define JIT_HASH_CONSTS 1
#define JIT_NUM_OPERANDS 3
+#if defined(_AIX) && !defined(_CALL_AIX) && !defined(_CALL_LINUX)
+# define _CALL_AIXDESC 1
+#endif
/*
* Types
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# define HIDDEN /**/
#endif
+#if PACKED_STACK || STRONG_TYPE_CHECKING
+# define assert_arg_type(code, expect) \
+ do assert((code) == (expect)); while (0)
+# define assert_putarg_type(code, expect) \
+ do \
+ assert((((code) - jit_code_putargr_c) >> 2) == \
+ ((expect) - jit_code_arg_c)); \
+ while (0)
+#else
+# define assert_arg_type(code, expect) \
+ do assert((int)(code) == (int)(expect) || \
+ (code) == jit_code_arg); while (0)
+# define assert_putarg_type(code, expect) \
+ do \
+ assert(((((code) - jit_code_putargr_c) >> 2) == \
+ ((expect) - jit_code_arg_c)) || \
+ ((code) == jit_code_arg)); \
+ while (0)
+#endif
+
#define rc(value) jit_class_##value
#define rn(reg) (jit_regno(_rvs[jit_regno(reg)].spec))
(!jit_regset_tstbit(&_jitc->regarg, regno) && \
!jit_regset_tstbit(&_jitc->regsav, regno))
-#define jit_inc_synth(code) \
+#define jit_code_inc_synth(code) \
do { \
- (void)jit_new_node(jit_code_##code); \
+ (void)jit_new_node(code); \
jit_synth_inc(); \
} while (0)
-#define jit_inc_synth_w(code, u) \
+#define jit_inc_synth(name) \
+ jit_code_inc_synth(jit_code_##name)
+#define jit_code_inc_synth_w(code, u) \
do { \
- (void)jit_new_node_w(jit_code_##code, u); \
+ (void)jit_new_node_w(code, u); \
jit_synth_inc(); \
} while (0)
-#define jit_inc_synth_f(code, u) \
+#define jit_inc_synth_w(name, u) \
+ jit_code_inc_synth_w(jit_code_##name, u)
+#define jit_code_inc_synth_f(code, u) \
do { \
- (void)jit_new_node_f(jit_code_##code, u); \
+ (void)jit_new_node_f(code, u); \
jit_synth_inc(); \
} while (0)
-#define jit_inc_synth_d(code, u) \
+#define jit_inc_synth_f(name, u) \
+ jit_code_inc_synth_f(jit_code_##name, u)
+#define jit_code_inc_synth_d(code, u) \
do { \
- (void)jit_new_node_d(jit_code_##code, u); \
+ (void)jit_new_node_d(code, u); \
jit_synth_inc(); \
} while (0)
-#define jit_inc_synth_ww(code, u, v) \
+#define jit_inc_synth_d(name, u) \
+ jit_code_inc_synth_d(jit_code_##name, u)
+#define jit_code_inc_synth_ww(code, u, v) \
do { \
- (void)jit_new_node_ww(jit_code_##code, u, v); \
+ (void)jit_new_node_ww(code, u, v); \
jit_synth_inc(); \
} while (0)
-#define jit_inc_synth_wp(code, u, v) \
+#define jit_inc_synth_ww(name, u, v) \
+ jit_code_inc_synth_ww(jit_code_##name, u, v)
+#define jit_code_inc_synth_wp(code, u, v) \
do { \
- (void)jit_new_node_wp(jit_code_##code, u, v); \
+ (void)jit_new_node_wp(code, u, v); \
jit_synth_inc(); \
} while (0)
-#define jit_inc_synth_fp(code, u, v) \
+#define jit_inc_synth_wp(name, u, v) \
+ jit_code_inc_synth_wp(jit_code_##name, u, v)
+#define jit_code_inc_synth_fp(code, u, v) \
do { \
- (void)jit_new_node_fp(jit_code_##code, u, v); \
+ (void)jit_new_node_fp(code, u, v); \
jit_synth_inc(); \
} while (0)
-#define jit_inc_synth_dp(code, u, v) \
+#define jit_inc_synth_fp(name, u, v) \
+ jit_code_inc_synth_fp(jit_code_##name, u, v)
+#define jit_code_inc_synth_dp(code, u, v) \
do { \
- (void)jit_new_node_dp(jit_code_##code, u, v); \
+ (void)jit_new_node_dp(code, u, v); \
jit_synth_inc(); \
} while (0)
+#define jit_inc_synth_dp(name, u, v) \
+ jit_code_inc_synth_dp(jit_code_##name, u, v)
#define jit_dec_synth() jit_synth_dec()
+#define jit_link_alist(node) \
+ do { \
+ node->link = _jitc->function->alist; \
+ _jitc->function->alist = node; \
+ } while (0)
+#define jit_check_frame() \
+ do { \
+ if (!_jitc->function->need_frame) { \
+ _jitc->again = 1; \
+ _jitc->function->need_frame = 1; \
+ } \
+ } while (0)
+#define jit_diffsize() (stack_framesize - _jitc->framesize)
+#define jit_framesize() (stack_framesize - jit_diffsize())
+#define jit_selfsize() (_jitc->function->self.size - jit_diffsize())
+
#define jit_link_prolog() \
do { \
_jitc->tail->link = _jitc->function->prolog->link; \
#define jit_class_xpr 0x80000000 /* float / vector */
/* Used on sparc64 where %f0-%f31 can be encode for single float
* but %f32 to %f62 only as double precision */
-#define jit_class_sng 0x10000000 /* Single precision float */
-#define jit_class_dbl 0x20000000 /* Only double precision float */
+#define jit_class_sng 0x00010000 /* Single precision float */
+#define jit_class_dbl 0x00020000 /* Only double precision float */
#define jit_regno_patch 0x00008000 /* this is a register
* returned by a "user" call
* to jit_get_reg() */
} call;
jit_node_t *prolog;
jit_node_t *epilog;
+ jit_node_t *alist;
jit_int32_t *regoff;
jit_regset_t regset;
jit_int32_t stack;
+#if defined(__i386__) || defined(__x86_64__)
+ jit_int32_t cvt_offset; /* allocai'd offset for x87<->xmm or
+ * fpr<->gpr transfer using the stack */
+#endif
/* Helper for common jit generation pattern, used in GNU Smalltalk
* and possibly others, where a static frame layout is required or
jit_uint32_t define_frame : 1;
jit_uint32_t assume_frame : 1;
+ jit_uint32_t need_frame : 1; /* need frame pointer? */
+ jit_uint32_t need_stack : 1; /* need stack pointer? */
+ jit_uint32_t need_return : 1; /* not a leaf function */
+
/* alloca offset offset */
jit_int32_t aoffoff;
/* uses allocar flag */
jit_uint32_t allocar : 1;
+#if __arm__
+ /* If will, or might use float registers and vfp is not available.
+ * Use the first 64 bytes always, as the access to the virtual float
+ * registers use hardcoded instructions that can only reach 64 byte
+ * displacements, and to keep code simpler, do not use temporaries. */
+ jit_uint32_t swf_offset : 1;
+ /* If need to call C functions for some operation, or variadic function */
+ jit_uint32_t save_reg_args : 1;
+#endif
+
/* varargs state offsets */
jit_int32_t vaoff; /* offset of jit_va_list */
jit_int32_t vagp; /* first gp va argument */
jit_int32_t rout; /* first output register */
jit_int32_t breg; /* base register for prolog/epilog */
#endif
+#if __mips__
+ struct {
+ jit_int32_t op; /* pending instruction, candidate
+ * to be inserted in a delay slot */
+ jit_bool_t pend; /* non zero if need to emit op */
+ } inst;
+#endif
#if __mips__ || __ia64__ || __alpha__ || \
(__sparc__ && __WORDSIZE == 64) || __riscv || __loongarch__
jit_int32_t carry;
#endif
jit_uint32_t no_data : 1;
jit_uint32_t no_note : 1;
+ jit_int32_t framesize; /* space for callee save registers,
+ * frame pointer and return address */
jit_int32_t reglen; /* number of registers */
jit_regset_t regarg; /* cannot allocate */
jit_regset_t regsav; /* automatic spill only once */
jit_regset_t reglive; /* known live registers at some point */
jit_regset_t regmask; /* register mask to update reglive */
+ jit_regset_t explive; /* explicitly marked as live */
struct {
jit_uint8_t *end;
} code;
struct {
jit_uint8_t *ptr;
jit_word_t length;
+ /* PROTECTED bytes starting at PTR are mprotect'd. */
+ jit_word_t protected;
} code;
struct {
jit_uint8_t *ptr;
/*
- * Copyright (C) 2019-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#define JIT_NOREG _NOREG
} jit_reg_t;
+typedef struct {
+ jit_uint32_t flogr : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t jit_cpu;
+
#endif /* _jit_s390_h */
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
_NOREG,
} jit_reg_t;
+typedef struct {
+ jit_uint32_t lzcnt : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t jit_cpu;
+
#endif /* _jit_sparc_h */
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
jit_uint32_t avx : 1;
/* lahf/sahf available in 64 bits mode */
jit_uint32_t lahf : 1;
+ /* lzcnt and tzcnt? */
+ jit_uint32_t abm : 1;
+ /* adcx and adox instructions available? */
+ jit_uint32_t adx : 1;
} jit_cpu_t;
/*
AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
-D_GNU_SOURCE $(LIGHTNING_CFLAGS)
liblightning_LTLIBRARIES = liblightning.la
-liblightning_la_LDFLAGS = -version-info 1:0:0
+liblightning_la_LDFLAGS = -version-info 2:0:0
+AM_CPPFLAGS =
if get_jit_size
JIT_SIZE_PATH = "$(top_builddir)/jit_$(cpu)-sz.c"
-AM_CPPFLAGS=-DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
+AM_CPPFLAGS += -DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
+endif
+if strong_type_checking
+AM_CPPFLAGS += -DSTRONG_TYPE_CHECKING=1
endif
liblightningdir = $(libdir)
liblightning_la_SOURCES = \
jit_disasm.c \
jit_memory.c \
- jit_names.c \
jit_note.c \
jit_print.c \
jit_size.c \
lightning.c
EXTRA_DIST = \
+ jit_names.c \
jit_fallback.c \
jit_rewind.c \
+ aarch64-logical-immediates.c \
jit_aarch64.c \
jit_aarch64-cpu.c \
jit_aarch64-fpu.c \
--- /dev/null
+// AArch64 Logical Immediate Encoding and Decoding
+//
+// I hereby place this code in the public domain, as per the terms of the
+// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/
+
+#include <stdint.h>
+#include <stdbool.h>
+
+static inline int nonzeroCountTrailingZeros64(uint64_t n) {
+ return __builtin_ctzll(n);
+}
+
+static inline int countTrailingZeros64(uint64_t n) {
+ return n ? nonzeroCountTrailingZeros64(n) : 64;
+}
+
+static inline int nonzeroCountLeadingZeros64(uint64_t n) {
+ return __builtin_clzll(n);
+}
+
+static inline int nonzeroCountLeadingZeros32(uint32_t n) {
+ return __builtin_clz(n);
+}
+
+static inline uint64_t rotateRight64(uint64_t v, int n) {
+ // return __builtin_rotateright64(v, n);
+ return (v >> (n & 63)) | (v << (-n & 63));
+}
+
+static inline uint64_t clearTrailingOnes64(uint64_t n) {
+ return n & (n + 1);
+}
+
+#define ENCODE_FAILED (-1)
+
+int encodeLogicalImmediate64(uint64_t val) {
+ // Consider an ARM64 logical immediate as a pattern of "o" ones preceded
+ // by "z" more-significant zeroes, repeated to fill a 64-bit integer.
+ // o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This
+ // part of the pattern is encoded in the fields "imms" and "N".
+ //
+ // "immr" encodes a further right rotate of the repeated pattern, allowing
+ // a wide range of useful bitwise constants to be represented.
+ //
+ // (The spec describes the "immr" rotate as rotating the "o + z" bit
+ // pattern before repeating it to fill 64-bits, but, as it's a repeating
+ // pattern, rotating afterwards is equivalent.)
+
+ // This encoding is not allowed to represent all-zero or all-one values.
+ if (val == 0 || ~val == 0)
+ return ENCODE_FAILED;
+
+ // To detect an immediate that may be encoded in this scheme, we first
+ // remove the right-rotate, by rotating such that the least significant
+ // bit is a one and the most significant bit is a zero.
+ //
+ // We do this by clearing any trailing one bits, then counting the
+ // trailing zeroes. This finds an "edge", where zero goes to one.
+ // We then rotate the original value right by that amount, moving
+ // the first one to the least significant bit.
+
+ int rotation = countTrailingZeros64(clearTrailingOnes64(val));
+ uint64_t normalized = rotateRight64(val, rotation & 63);
+
+ // Now we have normalized the value, and determined the rotation, we can
+ // determine "z" by counting the leading zeroes, and "o" by counting the
+ // trailing ones. (These will both be positive, as we already rejected 0
+ // and ~0, and rotated the value to start with a zero and end with a one.)
+
+ int zeroes = nonzeroCountLeadingZeros64(normalized);
+ int ones = nonzeroCountTrailingZeros64(~normalized);
+ int size = zeroes + ones;
+
+ // Detect the repeating pattern (by comparing every repetition to the
+ // one next to it, using rotate).
+
+ if (rotateRight64(val, size & 63) != val)
+ return ENCODE_FAILED;
+
+ // We do not need to further validate size to ensure it is a power of two
+ // between 2 and 64. The only "minimal" patterns that can repeat to fill a
+ // 64-bit value must have a length that is a factor of 64 (i.e. it is a
+ // power of two in the range [1,64]). And our pattern cannot be of length
+ // one (as we already rejected 0 and ~0).
+ //
+ // By "minimal" patterns I refer to patterns which do not themselves
+ // contain repetitions. For example, '010101' is a non-minimal pattern of
+ // a non-power-of-two length that can pass the above rotational test. It
+ // consists of the minimal pattern '01'. All our patterns are minimal, as
+ // they contain only one contiguous run of ones separated by at least one
+ // zero.
+
+ // Finally, we encode the values. "rotation" is the amount we rotated
+ // right by to "undo" the right-rotate encoded in immr, so must be
+ // negated.
+
+ // size 2: N=0 immr=00000r imms=11110s
+ // size 4: N=0 immr=0000rr imms=1110ss
+ // size 8: N=0 immr=000rrr imms=110sss
+ // size 16: N=0 immr=00rrrr imms=10ssss
+ // size 32: N=0 immr=0rrrrr imms=0sssss
+ // size 64: N=1 immr=rrrrrr imms=ssssss
+ int immr = -rotation & (size - 1);
+ int imms = -(size << 1) | (ones - 1);
+ int N = (size >> 6);
+
+ return (N << 12) | (immr << 6) | (imms & 0x3f);
+}
+
+int encodeLogicalImmediate32(uint32_t val) {
+ return encodeLogicalImmediate64(((uint64_t)val << 32) | val);
+}
+
+// Decoding!
+
+bool isValidLogicalImmediate64(unsigned val) {
+ unsigned N = (val >> 12) & 1;
+ unsigned imms = val & 0x3f;
+ unsigned pattern = (N << 6) | (~imms & 0x3f);
+ return (pattern & (pattern - 1)) != 0;
+}
+
+bool isValidLogicalImmediate32(unsigned val) {
+ unsigned N = (val >> 12) & 1;
+ return N == 0 && isValidLogicalImmediate64(val);
+}
+
+#define DECODE_FAILED 0
+
+// returns DECODE_FAILED (zero) if the encoding is invalid
+uint64_t decodeLogicalImmediate64(unsigned val) {
+ // Fun way to generate the immediates with mask ^ (mask << S)
+ static const uint64_t mask_lookup[] = {
+ 0xffffffffffffffff, // size = 64
+ 0x00000000ffffffff, // size = 32
+ 0x0000ffff0000ffff, // size = 16
+ 0x00ff00ff00ff00ff, // size = 8
+ 0x0f0f0f0f0f0f0f0f, // size = 4
+ 0x3333333333333333, // size = 2
+ };
+
+ unsigned N = (val >> 12) & 1;
+ int immr = (val >> 6) & 0x3f;
+ unsigned imms = val & 0x3f;
+
+ unsigned pattern = (N << 6) | (~imms & 0x3f);
+
+ if (!(pattern & (pattern - 1))) return DECODE_FAILED;
+
+ int leading_zeroes = nonzeroCountLeadingZeros32(pattern);
+ unsigned imms_mask = 0x7fffffff >> leading_zeroes;
+ uint64_t mask = mask_lookup[leading_zeroes - 25];
+ unsigned S = (imms + 1) & imms_mask;
+ return rotateRight64(mask ^ (mask << S), immr);
+}
+
+uint32_t decodeLogicalImmediate32(unsigned val) {
+ unsigned N = (val >> 12) & 1;
+ if (N) return DECODE_FAILED;
+ return (uint32_t)decodeLogicalImmediate64(val);
+}
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
jit_int32_t w;
# undef ui
} instr_t;
-# define stack_framesize 160
+# define s26_p(d) ((d) >= -33554432 && (d) <= 33554431)
# define ii(i) *_jit->pc.ui++ = i
# define ldr(r0,r1) ldr_l(r0,r1)
# define ldxr(r0,r1,r2) ldxr_l(r0,r1,r2)
# define A64_ORR 0x2a000000
# define A64_MOV 0x2a0003e0 /* AKA orr Rd,xzr,Rm */
# define A64_MVN 0x2a2003e0
+# define A64_CLS 0x5ac01400
+# define A64_CLZ 0x5ac01000
+# define A64_RBIT 0x5ac00000
# define A64_UXTW 0x2a0003e0 /* AKA MOV */
# define A64_EOR 0x4a000000
# define A64_ANDS 0x6a000000
# define MOV(Rd,Rm) ox_x(A64_MOV|XS,Rd,Rm)
# define MVN(Rd,Rm) ox_x(A64_MVN|XS,Rd,Rm)
# define NEG(Rd,Rm) ox_x(A64_NEG|XS,Rd,Rm)
+# define CLS(Rd,Rm) o_xx(A64_CLS|XS,Rd,Rm)
+# define CLZ(Rd,Rm) o_xx(A64_CLZ|XS,Rd,Rm)
+# define RBIT(Rd,Rm) o_xx(A64_RBIT|XS,Rd,Rm)
# define MOVN(Rd,Imm16) ox_h(A64_MOVN|XS,Rd,Imm16)
# define MOVN_16(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16)
# define MOVN_32(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16)
static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define negr(r0,r1) NEG(r0,r1)
# define comr(r0,r1) MVN(r0,r1)
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) CLZ(r0,r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# define andr(r0,r1,r2) AND(r0,r1,r2)
# define andi(r0,r1,i0) _andi(_jit,r0,r1,i0)
static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# define bmci(i0,r0,i1) bmxi(BCC_EQ,i0,r0,i1)
# define jmpr(r0) BR(r0)
# define jmpi(i0) _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
# define jmpi_p(i0) _jmpi_p(_jit,i0)
static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
# define callr(r0) BLR(r0)
# define calli(i0) _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
# define calli_p(i0) _calli_p(_jit,i0)
static jit_word_t _calli_p(jit_state_t*,jit_word_t);
# define prolog(i0) _prolog(_jit,i0)
#endif
#if CODE
+/* https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ */
+#include "aarch64-logical-immediates.c"
static jit_int32_t
logical_immediate(jit_word_t imm)
{
- /* There are 5334 possible immediate values, but to avoid the
- * need of either too complex code or large lookup tables,
- * only check for (simply) encodable common/small values */
- switch (imm) {
- case -16: return (0xf3b);
- case -15: return (0xf3c);
- case -13: return (0xf3d);
- case -9: return (0xf3e);
- case -8: return (0xf7c);
- case -7: return (0xf7d);
- case -5: return (0xf7e);
- case -4: return (0xfbd);
- case -3: return (0xfbe);
- case -2: return (0xffe);
- case 1: return (0x000);
- case 2: return (0xfc0);
- case 3: return (0x001);
- case 4: return (0xf80);
- case 6: return (0xfc1);
- case 7: return (0x002);
- case 8: return (0xf40);
- case 12: return (0xf81);
- case 14: return (0xfc2);
- case 15: return (0x003);
- case 16: return (0xf00);
- default: return (-1);
+ jit_int32_t result = encodeLogicalImmediate64(imm);
+ if (result != ENCODE_FAILED) {
+ assert(isValidLogicalImmediate64(result));
+ return (result & 0xfff);
}
+ return (-1);
}
static void
_o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26)
{
instr_t i;
- assert(Simm26 >= -33554432 && Simm26 <= 33554431);
+ assert(s26_p(Simm26));
assert(!(Op & ~0xfc000000));
i.w = Op;
i.imm26.b = Simm26;
CSEL(r0, r0, r1, CC_EQ);
}
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ RBIT(r0, r1);
+ clor(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ RBIT(r0, r1);
+ clzr(r0, r0);
+}
+
static void
_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
retry = _jit->pc.w;
LDAXR(r0, r1);
eqr(r0, r0, r2);
- jump0 = beqi(_jit->pc.w r0, 0); /* beqi done r0 0 */
+ jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */
STLXR(r3, r0, r1);
jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */
/* done: */
return (w);
}
-static void
+static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t w;
jit_int32_t reg;
- w = (i0 - _jit->pc.w) >> 2;
- if (w >= -33554432 && w <= 33554431)
- B(w);
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 2;
+ if (s26_p(d))
+ B(d);
else {
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i0);
jmpr(rn(reg));
jit_unget_reg(reg);
}
+ return (w);
}
static jit_word_t
return (w);
}
-static void
+static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t w;
jit_int32_t reg;
- w = (i0 - _jit->pc.w) >> 2;
- if (w >= -33554432 && w <= 33554431)
- BL(w);
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 2;
+ if (s26_p(d))
+ BL(d);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
callr(rn(reg));
jit_unget_reg(reg);
}
+ return (w);
}
static jit_word_t
return (w);
}
-/*
- * prolog and epilog not as "optimized" as one would like, but the
- * problem of overallocating stack space to save callee save registers
- * exists on all ports, and is still a todo to use a variable
- * stack_framesize
- * value, what would cause needing to patch some calls, most likely
- * the offset of jit_arg* of stack arguments.
- */
static void
_prolog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t reg;
+ jit_int32_t reg, rreg, offs;
if (_jitc->function->define_frame || _jitc->function->assume_frame) {
jit_int32_t frame = -_jitc->function->frame;
+ jit_check_frame();
assert(_jitc->function->self.aoff >= frame);
if (_jitc->function->assume_frame)
return;
_jitc->function->stack = ((_jitc->function->self.alen -
/* align stack at 16 bytes */
_jitc->function->self.aoff) + 15) & -16;
- STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(stack_framesize >> 3));
- MOV_XSP(FP_REGNO, SP_REGNO);
-#define SPILL(L, R, O) \
- do { \
- if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) { \
- if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \
- STPI(L, R, SP_REGNO, O); \
- else \
- STRI(L, SP_REGNO, O); \
- } \
- else if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \
- STRI(R, SP_REGNO, O + 1); \
- } while (0)
- SPILL(19, 20, 2);
- SPILL(21, 22, 4);
- SPILL(23, 24, 6);
- SPILL(25, 26, 8);
- SPILL(27, 28, 10);
-#undef SPILL
-#define SPILL(R, O) \
- do { \
- if (jit_regset_tstbit(&_jitc->function->regset, _V##R)) \
- stxi_d(O, SP_REGNO, R); \
- } while (0)
- SPILL( 8, 96);
- SPILL( 9, 104);
- SPILL(10, 112);
- SPILL(11, 120);
- SPILL(12, 128);
- SPILL(13, 136);
- SPILL(14, 144);
- SPILL(15, 152);
-#undef SPILL
- if (_jitc->function->stack)
+
+ if (!_jitc->function->need_frame) {
+ /* check if any callee save register needs to be saved */
+ for (reg = 0; reg < _jitc->reglen; ++reg)
+ if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+ (_rvs[reg].spec & jit_class_sav)) {
+ jit_check_frame();
+ break;
+ }
+ }
+
+ if (_jitc->function->need_frame) {
+ STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(jit_framesize() >> 3));
+ MOV_XSP(FP_REGNO, SP_REGNO);
+ }
+ /* callee save registers */
+ for (reg = 0, offs = 2; reg < jit_size(iregs);) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg]))
+ break;
+ }
+ if (rreg < jit_size(iregs)) {
+ STPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs);
+ offs += 2;
+ reg = rreg + 1;
+ }
+ else {
+ STRI(rn(iregs[reg]), SP_REGNO, offs);
+ ++offs;
+ /* No pair found */
+ break;
+ }
+ }
+ else
+ ++reg;
+ }
+ for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ stxi_d(offs, SP_REGNO, rn(fregs[reg]));
+ offs += sizeof(jit_float64_t);
+ }
+ }
+
+ if (_jitc->function->stack)
subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
if (_jitc->function->allocar) {
reg = jit_get_reg(jit_class_gpr);
jit_unget_reg(reg);
}
+#if !__APPLE__
if (_jitc->function->self.call & jit_call_varargs) {
/* Save gp registers in the save area, if any is a vararg */
for (reg = 8 - _jitc->function->vagp / -8;
stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) +
reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg));
}
+#endif
}
static void
_epilog(jit_state_t *_jit, jit_node_t *node)
{
+ jit_int32_t reg, rreg, offs;
if (_jitc->function->assume_frame)
return;
if (_jitc->function->stack)
MOV_XSP(SP_REGNO, FP_REGNO);
-#define LOAD(L, R, O) \
- do { \
- if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) { \
- if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \
- LDPI(L, R, SP_REGNO, O); \
- else \
- LDRI(L, SP_REGNO, O); \
- } \
- else if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \
- LDRI(R, SP_REGNO, O + 1); \
- } while (0)
- LOAD(19, 20, 2);
- LOAD(21, 22, 4);
- LOAD(23, 24, 6);
- LOAD(25, 26, 8);
- LOAD(27, 28, 10);
-#undef LOAD
-#define LOAD(R, O) \
- do { \
- if (jit_regset_tstbit(&_jitc->function->regset, _V##R)) \
- ldxi_d(R, SP_REGNO, O); \
- } while (0)
- LOAD( 8, 96);
- LOAD( 9, 104);
- LOAD(10, 112);
- LOAD(11, 120);
- LOAD(12, 128);
- LOAD(13, 136);
- LOAD(14, 144);
- LOAD(15, 152);
-#undef LOAD
- LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, stack_framesize >> 3);
+ /* callee save registers */
+ for (reg = 0, offs = 2; reg < jit_size(iregs);) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg]))
+ break;
+ }
+ if (rreg < jit_size(iregs)) {
+ LDPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs);
+ offs += 2;
+ reg = rreg + 1;
+ }
+ else {
+ LDRI(rn(iregs[reg]), SP_REGNO, offs);
+ ++offs;
+ /* No pair found */
+ break;
+ }
+ }
+ else
+ ++reg;
+ }
+ for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ ldxi_d(rn(fregs[reg]), SP_REGNO, offs);
+ offs += sizeof(jit_float64_t);
+ }
+ }
+
+ if (_jitc->function->need_frame)
+ LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, jit_framesize() >> 3);
RET();
}
static void
_vastart(jit_state_t *_jit, jit_int32_t r0)
{
+#if !__APPLE__
jit_int32_t reg;
assert(_jitc->function->self.call & jit_call_varargs);
reg = jit_get_reg(jit_class_gpr);
/* Initialize stack pointer to the first stack argument. */
- addi(rn(reg), FP_REGNO, _jitc->function->self.size);
+ addi(rn(reg), FP_REGNO, jit_selfsize());
stxi(offsetof(jit_va_list_t, stack), r0, rn(reg));
/* Initialize gp top pointer to the first stack argument. */
stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
jit_unget_reg(reg);
+#else
+ assert(_jitc->function->self.call & jit_call_varargs);
+ addi(r0, FP_REGNO, jit_selfsize());
+#endif
}
static void
_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+#if !__APPLE__
jit_word_t ge_code;
jit_word_t lt_code;
jit_int32_t rg0, rg1;
jit_unget_reg(rg1);
/* Jump over overflow code. */
- lt_code = jmpi_p(_jit->pc.w);
+ lt_code = jmpi(_jit->pc.w);
/* Where to land if argument is in overflow area. */
patch_at(ge_code, _jit->pc.w);
patch_at(lt_code, _jit->pc.w);
jit_unget_reg(rg0);
+#else
+ assert(_jitc->function->self.call & jit_call_varargs);
+ ldr(r0, r1);
+ addi(r1, r1, sizeof(jit_word_t));
+#endif
}
static void
ffc = i.w & 0xffc00000;
if (fc == A64_B || fc == A64_BL) {
d = (label - instr) >> 2;
- assert(d >= -33554432 && d <= 33554431);
+ assert(s26_p(d));
i.imm26.b = d;
u.i[0] = i.w;
}
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
static void
_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+#if !__APPLE__
jit_word_t ge_code;
jit_word_t lt_code;
jit_int32_t rg0, rg1;
jit_unget_reg(rg1);
/* Jump over overflow code. */
- lt_code = jmpi_p(_jit->pc.w);
+ lt_code = jmpi(_jit->pc.w);
/* Where to land if argument is in overflow area. */
patch_at(ge_code, _jit->pc.w);
patch_at(lt_code, _jit->pc.w);
jit_unget_reg(rg0);
+#else
+ assert(_jitc->function->self.call & jit_call_varargs);
+ ldr_d(r0, r1);
+ addi(r1, r1, sizeof(jit_float64_t));
+#endif
}
#endif
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 120
+# if PACKED_STACK
+#define JIT_INSTR_MAX 96
0, /* data */
0, /* live */
- 4, /* align */
+ 12, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
- 120, /* prolog */
+ 96, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
- 44, /* va_start */
- 64, /* va_arg */
- 72, /* va_arg_d */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
+ 4, /* va_start */
+ 8, /* va_arg */
+ 12, /* va_arg_d */
0, /* va_end */
4, /* addr */
20, /* addi */
16, /* movi */
8, /* movnr */
8, /* movzr */
+ 28, /* casr */
+ 36, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
4, /* extr_i */
4, /* extr_ui */
+ 8, /* bswapr_us */
+ 8, /* bswapr_ui */
+ 4, /* bswapr_ul */
8, /* htonr_us */
8, /* htonr_ui */
4, /* htonr_ul */
4, /* ldr_c */
- 12, /* ldi_c */
+ 16, /* ldi_c */
4, /* ldr_uc */
- 12, /* ldi_uc */
+ 16, /* ldi_uc */
4, /* ldr_s */
- 12, /* ldi_s */
+ 16, /* ldi_s */
4, /* ldr_us */
- 12, /* ldi_us */
+ 16, /* ldi_us */
4, /* ldr_i */
- 12, /* ldi_i */
+ 16, /* ldi_i */
4, /* ldr_ui */
- 12, /* ldi_ui */
+ 16, /* ldi_ui */
4, /* ldr_l */
- 12, /* ldi_l */
+ 16, /* ldi_l */
8, /* ldxr_c */
20, /* ldxi_c */
4, /* ldxr_uc */
4, /* ldxr_l */
20, /* ldxi_l */
4, /* str_c */
- 12, /* sti_c */
+ 16, /* sti_c */
4, /* str_s */
- 12, /* sti_s */
+ 16, /* sti_s */
4, /* str_i */
- 12, /* sti_i */
+ 16, /* sti_i */
4, /* str_l */
- 12, /* sti_l */
+ 16, /* sti_l */
4, /* stxr_c */
20, /* stxi_c */
4, /* stxr_s */
8, /* bxsubr_u */
8, /* bxsubi_u */
4, /* jmpr */
- 20, /* jmpi */
+ 4, /* jmpi */
4, /* callr */
- 20, /* calli */
+ 16, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
4, /* movr_f */
8, /* movi_f */
8, /* ldr_f */
- 16, /* ldi_f */
+ 20, /* ldi_f */
8, /* ldxr_f */
24, /* ldxi_f */
8, /* str_f */
- 16, /* sti_f */
+ 20, /* sti_f */
8, /* stxr_f */
24, /* stxi_f */
8, /* bltr_f */
4, /* movr_d */
12, /* movi_d */
8, /* ldr_d */
- 16, /* ldi_d */
+ 20, /* ldi_d */
8, /* ldxr_d */
24, /* ldxi_d */
8, /* str_d */
- 16, /* sti_d */
+ 20, /* sti_d */
8, /* stxr_d */
24, /* stxi_d */
8, /* bltr_d */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
+ 8, /* clo */
+ 4, /* clz */
+ 12, /* cto */
+ 8, /* ctz */
+
+# else /* PACKED_STACK */
+#define JIT_INSTR_MAX 120
+ 0, /* data */
+ 0, /* live */
+ 12, /* align */
+ 0, /* save */
+ 0, /* load */
+ 4, /* skip */
+ 0, /* #name */
+ 0, /* #note */
+ 0, /* label */
+ 120, /* prolog */
+ 0, /* ellipsis */
+ 0, /* va_push */
+ 0, /* allocai */
+ 0, /* allocar */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
+ 0, /* getarg_c */
+ 0, /* getarg_uc */
+ 0, /* getarg_s */
+ 0, /* getarg_us */
+ 0, /* getarg_i */
+ 0, /* getarg_ui */
+ 0, /* getarg_l */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
+ 4, /* va_start */
+ 8, /* va_arg */
+ 12, /* va_arg_d */
+ 0, /* va_end */
+ 4, /* addr */
+ 20, /* addi */
+ 4, /* addcr */
+ 12, /* addci */
+ 4, /* addxr */
+ 8, /* addxi */
+ 4, /* subr */
+ 20, /* subi */
+ 4, /* subcr */
+ 12, /* subci */
+ 4, /* subxr */
+ 8, /* subxi */
+ 24, /* rsbi */
+ 4, /* mulr */
+ 20, /* muli */
+ 12, /* qmulr */
+ 20, /* qmuli */
+ 12, /* qmulr_u */
+ 20, /* qmuli_u */
+ 4, /* divr */
+ 20, /* divi */
+ 4, /* divr_u */
+ 12, /* divi_u */
+ 20, /* qdivr */
+ 16, /* qdivi */
+ 20, /* qdivr_u */
+ 16, /* qdivi_u */
+ 12, /* remr */
+ 28, /* remi */
+ 12, /* remr_u */
+ 20, /* remi_u */
+ 4, /* andr */
+ 20, /* andi */
+ 4, /* orr */
+ 20, /* ori */
+ 4, /* xorr */
+ 20, /* xori */
+ 4, /* lshr */
+ 4, /* lshi */
+ 4, /* rshr */
+ 4, /* rshi */
+ 4, /* rshr_u */
+ 4, /* rshi_u */
+ 4, /* negr */
+ 4, /* comr */
+ 8, /* ltr */
+ 8, /* lti */
+ 8, /* ltr_u */
+ 8, /* lti_u */
+ 8, /* ler */
+ 8, /* lei */
+ 8, /* ler_u */
+ 8, /* lei_u */
+ 8, /* eqr */
+ 8, /* eqi */
+ 8, /* ger */
+ 8, /* gei */
+ 8, /* ger_u */
+ 8, /* gei_u */
+ 8, /* gtr */
+ 8, /* gti */
+ 8, /* gtr_u */
+ 8, /* gti_u */
+ 8, /* ner */
+ 8, /* nei */
+ 4, /* movr */
+ 16, /* movi */
+ 8, /* movnr */
+ 8, /* movzr */
+ 28, /* casr */
+ 36, /* casi */
+ 4, /* extr_c */
+ 4, /* extr_uc */
+ 4, /* extr_s */
+ 4, /* extr_us */
+ 4, /* extr_i */
+ 4, /* extr_ui */
8, /* bswapr_us */
8, /* bswapr_ui */
4, /* bswapr_ul */
- 28, /* casr */
- 36, /* casi */
+ 8, /* htonr_us */
+ 8, /* htonr_ui */
+ 4, /* htonr_ul */
+ 4, /* ldr_c */
+ 16, /* ldi_c */
+ 4, /* ldr_uc */
+ 16, /* ldi_uc */
+ 4, /* ldr_s */
+ 16, /* ldi_s */
+ 4, /* ldr_us */
+ 16, /* ldi_us */
+ 4, /* ldr_i */
+ 16, /* ldi_i */
+ 4, /* ldr_ui */
+ 16, /* ldi_ui */
+ 4, /* ldr_l */
+ 16, /* ldi_l */
+ 8, /* ldxr_c */
+ 20, /* ldxi_c */
+ 4, /* ldxr_uc */
+ 20, /* ldxi_uc */
+ 4, /* ldxr_s */
+ 16, /* ldxi_s */
+ 4, /* ldxr_us */
+ 16, /* ldxi_us */
+ 4, /* ldxr_i */
+ 20, /* ldxi_i */
+ 4, /* ldxr_ui */
+ 16, /* ldxi_ui */
+ 4, /* ldxr_l */
+ 20, /* ldxi_l */
+ 4, /* str_c */
+ 16, /* sti_c */
+ 4, /* str_s */
+ 16, /* sti_s */
+ 4, /* str_i */
+ 16, /* sti_i */
+ 4, /* str_l */
+ 16, /* sti_l */
+ 4, /* stxr_c */
+ 20, /* stxi_c */
+ 4, /* stxr_s */
+ 20, /* stxi_s */
+ 4, /* stxr_i */
+ 20, /* stxi_i */
+ 4, /* stxr_l */
+ 20, /* stxi_l */
+ 8, /* bltr */
+ 8, /* blti */
+ 8, /* bltr_u */
+ 8, /* blti_u */
+ 8, /* bler */
+ 8, /* blei */
+ 8, /* bler_u */
+ 8, /* blei_u */
+ 8, /* beqr */
+ 24, /* beqi */
+ 8, /* bger */
+ 8, /* bgei */
+ 8, /* bger_u */
+ 8, /* bgei_u */
+ 8, /* bgtr */
+ 8, /* bgti */
+ 8, /* bgtr_u */
+ 8, /* bgti_u */
+ 8, /* bner */
+ 24, /* bnei */
+ 8, /* bmsr */
+ 8, /* bmsi */
+ 8, /* bmcr */
+ 8, /* bmci */
+ 8, /* boaddr */
+ 8, /* boaddi */
+ 8, /* boaddr_u */
+ 8, /* boaddi_u */
+ 8, /* bxaddr */
+ 8, /* bxaddi */
+ 8, /* bxaddr_u */
+ 8, /* bxaddi_u */
+ 8, /* bosubr */
+ 8, /* bosubi */
+ 8, /* bosubr_u */
+ 8, /* bosubi_u */
+ 8, /* bxsubr */
+ 8, /* bxsubi */
+ 8, /* bxsubr_u */
+ 8, /* bxsubi_u */
+ 4, /* jmpr */
+ 4, /* jmpi */
+ 4, /* callr */
+ 16, /* calli */
+ 0, /* prepare */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
+ 0, /* finishr */
+ 0, /* finishi */
+ 0, /* ret */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
+ 0, /* retval_c */
+ 0, /* retval_uc */
+ 0, /* retval_s */
+ 0, /* retval_us */
+ 0, /* retval_i */
+ 0, /* retval_ui */
+ 0, /* retval_l */
+ 96, /* epilog */
+ 0, /* arg_f */
+ 0, /* getarg_f */
+ 0, /* putargr_f */
+ 0, /* putargi_f */
+ 4, /* addr_f */
+ 12, /* addi_f */
+ 4, /* subr_f */
+ 12, /* subi_f */
+ 12, /* rsbi_f */
+ 4, /* mulr_f */
+ 12, /* muli_f */
+ 4, /* divr_f */
+ 12, /* divi_f */
+ 4, /* negr_f */
+ 4, /* absr_f */
+ 4, /* sqrtr_f */
+ 8, /* ltr_f */
+ 16, /* lti_f */
+ 8, /* ler_f */
+ 16, /* lei_f */
+ 8, /* eqr_f */
+ 16, /* eqi_f */
+ 8, /* ger_f */
+ 16, /* gei_f */
+ 8, /* gtr_f */
+ 16, /* gti_f */
+ 8, /* ner_f */
+ 16, /* nei_f */
+ 8, /* unltr_f */
+ 16, /* unlti_f */
+ 8, /* unler_f */
+ 16, /* unlei_f */
+ 16, /* uneqr_f */
+ 24, /* uneqi_f */
+ 8, /* unger_f */
+ 16, /* ungei_f */
+ 8, /* ungtr_f */
+ 16, /* ungti_f */
+ 16, /* ltgtr_f */
+ 24, /* ltgti_f */
+ 8, /* ordr_f */
+ 16, /* ordi_f */
+ 8, /* unordr_f */
+ 16, /* unordi_f */
+ 8, /* truncr_f_i */
+ 4, /* truncr_f_l */
+ 4, /* extr_f */
+ 4, /* extr_d_f */
+ 4, /* movr_f */
+ 8, /* movi_f */
+ 8, /* ldr_f */
+ 20, /* ldi_f */
+ 8, /* ldxr_f */
+ 24, /* ldxi_f */
+ 8, /* str_f */
+ 20, /* sti_f */
+ 8, /* stxr_f */
+ 24, /* stxi_f */
+ 8, /* bltr_f */
+ 16, /* blti_f */
+ 8, /* bler_f */
+ 16, /* blei_f */
+ 8, /* beqr_f */
+ 16, /* beqi_f */
+ 8, /* bger_f */
+ 16, /* bgei_f */
+ 8, /* bgtr_f */
+ 16, /* bgti_f */
+ 8, /* bner_f */
+ 16, /* bnei_f */
+ 8, /* bunltr_f */
+ 16, /* bunlti_f */
+ 8, /* bunler_f */
+ 16, /* bunlei_f */
+ 16, /* buneqr_f */
+ 24, /* buneqi_f */
+ 8, /* bunger_f */
+ 16, /* bungei_f */
+ 8, /* bungtr_f */
+ 16, /* bungti_f */
+ 16, /* bltgtr_f */
+ 24, /* bltgti_f */
+ 8, /* bordr_f */
+ 16, /* bordi_f */
+ 8, /* bunordr_f */
+ 16, /* bunordi_f */
+ 0, /* pushargr_f */
+ 0, /* pushargi_f */
+ 0, /* retr_f */
+ 0, /* reti_f */
+ 0, /* retval_f */
+ 0, /* arg_d */
+ 0, /* getarg_d */
+ 0, /* putargr_d */
+ 0, /* putargi_d */
+ 4, /* addr_d */
+ 12, /* addi_d */
+ 4, /* subr_d */
+ 12, /* subi_d */
+ 12, /* rsbi_d */
+ 4, /* mulr_d */
+ 12, /* muli_d */
+ 4, /* divr_d */
+ 12, /* divi_d */
+ 4, /* negr_d */
+ 4, /* absr_d */
+ 4, /* sqrtr_d */
+ 8, /* ltr_d */
+ 16, /* lti_d */
+ 8, /* ler_d */
+ 16, /* lei_d */
+ 8, /* eqr_d */
+ 16, /* eqi_d */
+ 8, /* ger_d */
+ 16, /* gei_d */
+ 8, /* gtr_d */
+ 16, /* gti_d */
+ 8, /* ner_d */
+ 16, /* nei_d */
+ 8, /* unltr_d */
+ 16, /* unlti_d */
+ 8, /* unler_d */
+ 16, /* unlei_d */
+ 16, /* uneqr_d */
+ 24, /* uneqi_d */
+ 8, /* unger_d */
+ 16, /* ungei_d */
+ 8, /* ungtr_d */
+ 16, /* ungti_d */
+ 16, /* ltgtr_d */
+ 24, /* ltgti_d */
+ 8, /* ordr_d */
+ 16, /* ordi_d */
+ 8, /* unordr_d */
+ 16, /* unordi_d */
+ 8, /* truncr_d_i */
+ 4, /* truncr_d_l */
+ 4, /* extr_d */
+ 4, /* extr_f_d */
+ 4, /* movr_d */
+ 12, /* movi_d */
+ 8, /* ldr_d */
+ 20, /* ldi_d */
+ 8, /* ldxr_d */
+ 24, /* ldxi_d */
+ 8, /* str_d */
+ 20, /* sti_d */
+ 8, /* stxr_d */
+ 24, /* stxi_d */
+ 8, /* bltr_d */
+ 16, /* blti_d */
+ 8, /* bler_d */
+ 16, /* blei_d */
+ 8, /* beqr_d */
+ 20, /* beqi_d */
+ 8, /* bger_d */
+ 16, /* bgei_d */
+ 8, /* bgtr_d */
+ 16, /* bgti_d */
+ 8, /* bner_d */
+ 16, /* bnei_d */
+ 8, /* bunltr_d */
+ 16, /* bunlti_d */
+ 8, /* bunler_d */
+ 16, /* bunlei_d */
+ 16, /* buneqr_d */
+ 24, /* buneqi_d */
+ 8, /* bunger_d */
+ 16, /* bungei_d */
+ 8, /* bungtr_d */
+ 16, /* bungti_d */
+ 16, /* bltgtr_d */
+ 24, /* bltgti_d */
+ 8, /* bordr_d */
+ 16, /* bordi_d */
+ 8, /* bunordr_d */
+ 16, /* bunordi_d */
+ 0, /* pushargr_d */
+ 0, /* pushargi_d */
+ 0, /* retr_d */
+ 0, /* reti_d */
+ 0, /* retval_d */
+ 0, /* movr_w_f */
+ 0, /* movr_ww_d */
+ 0, /* movr_w_d */
+ 0, /* movr_f_w */
+ 0, /* movi_f_w */
+ 0, /* movr_d_ww */
+ 0, /* movi_d_ww */
+ 0, /* movr_d_w */
+ 0, /* movi_d_w */
+ 8, /* clo */
+ 4, /* clz */
+ 12, /* cto */
+ 8, /* ctz */
+# endif
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* Paulo Cesar Pereira de Andrade
*/
+/* callee save
+ * align16(lr+fp+x19+x2[0-8]+v8+v9+v1[0-15]) */
+#define stack_framesize 160
+
#define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8)
#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8)
+#if __APPLE__
+typedef jit_pointer_t jit_va_list_t;
+#else
typedef struct jit_qreg {
jit_float64_t l;
jit_float64_t h;
jit_qreg_t q6;
jit_qreg_t q7;
} jit_va_list_t;
+#endif
/*
* Prototypes
*/
+#define compute_framesize() _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
#define patch(instr, node) _patch(_jit, instr, node)
static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
*/
jit_register_t _rvs[] = {
{ rc(gpr) | 0x08, "x8" },
+#if __APPLE__
+ { 0x12, "x18" },
+#else
{ rc(gpr) | 0x12, "x18" },
+#endif
{ rc(gpr) | 0x11, "x17" },
{ rc(gpr) | 0x10, "x16" },
{ rc(gpr) | 0x09, "x9" },
{ _NOREG, "<none>" },
};
+static jit_int32_t iregs[] = {
+ _R19, _R20, _R21, _R22, _R23, _R24, _R25, _R26, _R27, _R28
+};
+
+static jit_int32_t fregs[] = {
+ _V8, _V9, _V10, _V11, _V12, _V13, _V14, _V15
+};
+
/*
* Implementation
*/
_jit_allocai(jit_state_t *_jit, jit_int32_t length)
{
assert(_jitc->function);
+ jit_check_frame();
switch (length) {
case 0: case 1: break;
case 2: _jitc->function->self.aoff &= -2; break;
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
- if (JIT_RET != u)
- jit_movr(JIT_RET, u);
- jit_live(JIT_RET);
+ jit_code_inc_synth_w(code, u);
+ jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- if (u->code == jit_code_arg)
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
return (jit_arg_f_reg_p(u->u.w));
_jit_ellipsis(jit_state_t *_jit)
{
jit_inc_synth(ellipsis);
+ jit_check_frame();
if (_jitc->prepare) {
jit_link_prepare();
assert(!(_jitc->function->call.call & jit_call_varargs));
assert(!(_jitc->function->self.call & jit_call_varargs));
_jitc->function->self.call |= jit_call_varargs;
+#if !__APPLE_
/* Allocate va_list like object in the stack,
* with enough space to save all argument
* registers, and use fixed offsets for them. */
_jitc->function->vafp = (8 - _jitc->function->self.argf) * -16;
else
_jitc->function->vafp = 0;
+#endif
}
jit_dec_synth();
}
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
if (jit_arg_reg_p(_jitc->function->self.argi))
offset = _jitc->function->self.argi++;
else {
+#if PACKED_STACK || STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
+#if PACKED_STACK
+ _jitc->function->self.size +=
+ _jitc->function->self.size & ((1 << (code - jit_code_arg_c)) - 1);
+#endif
offset = _jitc->function->self.size;
+#if PACKED_STACK
+ _jitc->function->self.size += 1 << (code - jit_code_arg_c);
+#else
_jitc->function->self.size += sizeof(jit_word_t);
+#endif
+ jit_check_frame();
}
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
if (jit_arg_f_reg_p(_jitc->function->self.argf))
offset = _jitc->function->self.argf++;
else {
+#if PACKED_STACK
+ _jitc->function->self.size +=
+ _jitc->function->self.size & (sizeof(jit_float32_t) - 1);
+#endif
offset = _jitc->function->self.size;
+#if PACKED_STACK
+ _jitc->function->self.size += sizeof(jit_float32_t);
+#else
_jitc->function->self.size += sizeof(jit_word_t);
+#endif
+ jit_check_frame();
}
node = jit_new_node_ww(jit_code_arg_f, offset,
++_jitc->function->self.argn);
if (jit_arg_f_reg_p(_jitc->function->self.argf))
offset = _jitc->function->self.argf++;
else {
+#if PACKED_STACK
+ _jitc->function->self.size +=
+ _jitc->function->self.size & (sizeof(jit_float64_t) - 1);
+#endif
offset = _jitc->function->self.size;
- _jitc->function->self.size += sizeof(jit_word_t);
+ _jitc->function->self.size += sizeof(jit_float64_t);
+ jit_check_frame();
}
node = jit_new_node_ww(jit_code_arg_d, offset,
++_jitc->function->self.argn);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+ jit_movr(u, JIT_RA0 - v->u.w);
+#else
jit_extr_c(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_c(u, JIT_FP, v->u.w);
+#endif
+ }
+ else {
+ jit_node_t *node = jit_ldxi_c(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+ jit_movr(u, JIT_RA0 - v->u.w);
+#else
jit_extr_uc(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_uc(u, JIT_FP, v->u.w);
+#endif
+ }
+ else {
+ jit_node_t *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+ jit_movr(u, JIT_RA0 - v->u.w);
+#else
jit_extr_s(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_s(u, JIT_FP, v->u.w);
+#endif
+ }
+ else {
+ jit_node_t *node = jit_ldxi_s(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+ jit_movr(u, JIT_RA0 - v->u.w);
+#else
jit_extr_us(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_us(u, JIT_FP, v->u.w);
+#endif
+ }
+ else {
+ jit_node_t *node = jit_ldxi_us(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK || __WORDSIZE == 32
+ jit_movr(u, JIT_RA0 - v->u.w);
+#else
jit_extr_i(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_i(u, JIT_FP, v->u.w);
+#endif
+ }
+ else {
+ jit_node_t *node = jit_ldxi_i(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
+#if __WORDSIZE == 64
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_ui, u, v);
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+ jit_movr(u, JIT_RA0 - v->u.w);
+#else
jit_extr_ui(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_ui(u, JIT_FP, v->u.w);
+#endif
+ }
+ else {
+ jit_node_t *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_l, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_l(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_l(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
+#endif
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
- if (jit_arg_reg_p(v->u.w))
- jit_movr(JIT_RA0 - v->u.w, u);
- else
- jit_stxi(v->u.w, JIT_FP, u);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
+ if (jit_arg_reg_p(v->u.w)) {
+ jit_int32_t regno = JIT_RA0 - v->u.w;
+#if PACKED_STACK
+ switch (code) {
+ case jit_code_putargr_c: jit_extr_c(regno, u); break;
+ case jit_code_putargr_uc: jit_extr_uc(regno, u); break;
+ case jit_code_putargr_s: jit_extr_s(regno, u); break;
+ case jit_code_putargr_us: jit_extr_us(regno, u); break;
+# if __WORDISZE == 32
+ case jit_code_putargr_i: jit_movr(regno, u); break;
+# else
+ case jit_code_putargr_i: jit_extr_i(regno, u); break;
+ case jit_code_putargr_ui: jit_extr_ui(regno, u); break;
+ case jit_code_putargr_l: jit_movr(regno, u); break;
+# endif
+ default: abort(); break;
+ }
+#else
+ jit_movr(regno, u);
+#endif
+ }
+ else {
+ jit_node_t *node;
+#if PACKED_STACK
+ switch (code) {
+ case jit_code_putargr_c: case jit_code_putargr_uc:
+ node = jit_stxi_c(v->u.w, JIT_FP, u); break;
+ case jit_code_putargr_s: case jit_code_putargr_us:
+ node = jit_stxi_s(v->u.w, JIT_FP, u); break;
+# if __WORDSIZE == 32
+ case jit_code_putargr_i:
+ node = jit_stxi(v->u.w, JIT_FP, u); break;
+# else
+ case jit_code_putargr_i: case jit_code_putargr_ui:
+ node = jit_stxi_i(v->u.w, JIT_FP, u); break;
+ case jit_code_putargr_l:
+ node = jit_stxi(v->u.w, JIT_FP, u); break;
+# endif
+ default: abort(); break;
+ }
+#else
+ node = jit_stxi(v->u.w, JIT_FP, u);
+#endif
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
+#if PACKED_STACK
+ switch (code) {
+ case jit_code_putargi_c: u = (jit_int8_t)u; break;
+ case jit_code_putargi_uc: u = (jit_uint8_t)u; break;
+ case jit_code_putargi_s: u = (jit_int16_t)u; break;
+ case jit_code_putargi_us: u = (jit_uint16_t)u; break;
+# if __WORDSIZE == 32
+ case jit_code_putargi_i: break;
+# else
+ case jit_code_putargi_i: u = (jit_int32_t)u; break;
+ case jit_code_putargi_ui: u = (jit_uint32_t)u; break;
+ case jit_code_putargi_l: break;
+# endif
+ default: abort(); break;
+ }
+#endif
if (jit_arg_reg_p(v->u.w))
jit_movi(JIT_RA0 - v->u.w, u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
- jit_stxi(v->u.w, JIT_FP, regno);
+#if PACKED_STACK
+ switch (code) {
+ case jit_code_putargi_c: case jit_code_putargi_uc:
+ node = jit_stxi_c(v->u.w, JIT_FP, regno); break;
+ case jit_code_putargi_s: case jit_code_putargi_us:
+ node = jit_stxi_s(v->u.w, JIT_FP, regno); break;
+# if __WORDSIZE == 32
+ case jit_code_putargi_i:
+ node = jit_stxi(v->u.w, JIT_FP, regno); break;
+# else
+ case jit_code_putargi_i: case jit_code_putargi_ui:
+ node = jit_stxi_i(v->u.w, JIT_FP, regno); break;
+ case jit_code_putargi_l:
+ node = jit_stxi(v->u.w, JIT_FP, regno); break;
+# endif
+ default: abort(); break;
+ }
+#else
+ node = jit_stxi(v->u.w, JIT_FP, regno);
+#endif
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
jit_inc_synth_wp(getarg_f, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr_f(u, JIT_FA0 - v->u.w);
- else
- jit_ldxi_f(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_f(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_inc_synth_wp(putargr_f, u, v);
if (jit_arg_f_reg_p(v->u.w))
jit_movr_f(JIT_FA0 - v->u.w, u);
- else
- jit_stxi_f(v->u.w, JIT_FP, u);
+ else {
+ jit_node_t *node = jit_stxi_f(v->u.w, JIT_FP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
if (jit_arg_f_reg_p(v->u.w))
jit_movi_f(JIT_FA0 - v->u.w, u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
- jit_stxi_f(v->u.w, JIT_FP, regno);
+ node = jit_stxi_f(v->u.w, JIT_FP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
jit_inc_synth_wp(getarg_d, u, v);
if (jit_arg_f_reg_p(v->u.w))
jit_movr_d(u, JIT_FA0 - v->u.w);
- else
- jit_ldxi_d(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_d(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_inc_synth_wp(putargr_d, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr_d(JIT_FA0 - v->u.w, u);
- else
- jit_stxi_d(v->u.w, JIT_FP, u);
+ else {
+ jit_node_t *node = jit_stxi_d(v->u.w, JIT_FP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
if (jit_arg_reg_p(v->u.w))
jit_movi_d(JIT_FA0 - v->u.w, u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
- jit_stxi_d(v->u.w, JIT_FP, regno);
+ node = jit_stxi_d(v->u.w, JIT_FP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
assert(_jitc->function);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
- jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
+ jit_int32_t regno = JIT_RA0 - _jitc->function->call.argi;
+#if PACKED_STACK
+ switch (code) {
+ case jit_code_pushargr_c: jit_extr_c(regno, u); break;
+ case jit_code_pushargr_uc: jit_extr_uc(regno, u); break;
+ case jit_code_pushargr_s: jit_extr_s(regno, u); break;
+ case jit_code_pushargr_us: jit_extr_us(regno, u); break;
+# if __WORDISZE == 32
+ case jit_code_pushargr_i: jit_movr(regno, u); break;
+# else
+ case jit_code_pushargr_i: jit_extr_i(regno, u); break;
+ case jit_code_pushargr_ui: jit_extr_ui(regno, u); break;
+ case jit_code_pushargr_l: jit_movr(regno, u); break;
+# endif
+ default: abort(); break;
+ }
+#else
+ jit_movr(regno, u);
+#endif
+#if __APPLE__
+ if (_jitc->function->call.call & jit_call_varargs) {
+ assert(code == jit_code_pushargr);
+ jit_stxi(_jitc->function->call.size, JIT_SP, u);
+ _jitc->function->call.size += sizeof(jit_word_t);
+ }
+#endif
++_jitc->function->call.argi;
}
else {
+#if PACKED_STACK
+ _jitc->function->call.size +=
+ _jitc->function->call.size &
+ ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1);
+ switch (code) {
+ case jit_code_pushargr_c: case jit_code_pushargr_uc:
+ jit_stxi_c(_jitc->function->call.size, JIT_SP, u);
+ break;
+ case jit_code_pushargr_s: case jit_code_pushargr_us:
+ jit_stxi_s(_jitc->function->call.size, JIT_SP, u);
+ break;
+# if __WORDSIZE == 32
+ case jit_code_pushargr_i:
+ jit_stxi(_jitc->function->call.size, JIT_SP, u);
+ break;
+# else
+ case jit_code_pushargr_i: case jit_code_pushargr_ui:
+ jit_stxi_i(_jitc->function->call.size, JIT_SP, u);
+ break;
+ case jit_code_pushargr_l:
+ jit_stxi(_jitc->function->call.size, JIT_SP, u);
+ break;
+# endif
+ default:
+ abort();
+ break;
+ }
+ _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2);
+#else
jit_stxi(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += sizeof(jit_word_t);
+#endif
+ jit_check_frame();
}
jit_dec_synth();
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
+#if PACKED_STACK
+ switch (code) {
+ case jit_code_pushargi_c: u = (jit_int8_t)u; break;
+ case jit_code_pushargi_uc: u = (jit_uint8_t)u; break;
+ case jit_code_pushargi_s: u = (jit_int16_t)u; break;
+ case jit_code_pushargi_us: u = (jit_uint16_t)u; break;
+# if __WORDSIZE == 32
+ case jit_code_pushargi_i: break;
+# else
+ case jit_code_pushargi_i: u = (jit_int32_t)u; break;
+ case jit_code_pushargi_ui: u = (jit_uint32_t)u; break;
+ case jit_code_pushargi_l: break;
+# endif
+ default: abort(); break;
+ }
+#endif
if (jit_arg_reg_p(_jitc->function->call.argi)) {
- jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
+ regno = JIT_RA0 - _jitc->function->call.argi;
+ jit_movi(regno, u);
+#if __APPLE__
+ if (_jitc->function->call.call & jit_call_varargs) {
+ assert(code == jit_code_pushargi);
+ jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+ _jitc->function->call.size += sizeof(jit_word_t);
+ }
+#endif
++_jitc->function->call.argi;
}
else {
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
+#if PACKED_STACK
+ _jitc->function->call.size +=
+ _jitc->function->call.size &
+ ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1);
+ switch (code) {
+ case jit_code_pushargi_c: case jit_code_pushargi_uc:
+ jit_stxi_c(_jitc->function->call.size, JIT_SP, regno);
+ break;
+ case jit_code_pushargi_s: case jit_code_pushargi_us:
+ jit_stxi_s(_jitc->function->call.size, JIT_SP, regno);
+ break;
+# if __WORDSIZE == 32
+ case jit_code_pushargi_i:
+ jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+ break;
+# else
+ case jit_code_pushargi_i: case jit_code_pushargi_ui:
+ jit_stxi_i(_jitc->function->call.size, JIT_SP, regno);
+ break;
+ case jit_code_pushargi_l:
+ jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+ break;
+# endif
+ default:
+ abort();
+ break;
+ }
+ _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2);
+#else
jit_stxi(_jitc->function->call.size, JIT_SP, regno);
- jit_unget_reg(regno);
_jitc->function->call.size += sizeof(jit_word_t);
+#endif
+ jit_unget_reg(regno);
+ jit_check_frame();
}
jit_dec_synth();
}
jit_link_prepare();
if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+ if (_jitc->function->call.call & jit_call_varargs) {
+ assert(sizeof(jit_float32_t) == sizeof(jit_word_t));
+ jit_stxi_f(_jitc->function->call.size, JIT_SP,
+ JIT_FA0 - _jitc->function->call.argf);
+ _jitc->function->call.size += sizeof(jit_word_t);
+ }
+#endif
++_jitc->function->call.argf;
}
else {
+#if PACKED_STACK
+ _jitc->function->call.size +=
+ _jitc->function->call.size & (sizeof(jit_float32_t) - 1);
+ jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+ _jitc->function->call.size += sizeof(jit_float32_t);
+#else
jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += sizeof(jit_word_t);
+#endif
+ jit_check_frame();
}
jit_dec_synth();
}
jit_link_prepare();
if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+ if (_jitc->function->call.call & jit_call_varargs) {
+ assert(sizeof(jit_float32_t) == sizeof(jit_word_t));
+ jit_stxi_f(_jitc->function->call.size, JIT_SP,
+ JIT_FA0 - _jitc->function->call.argf);
+ _jitc->function->call.size += sizeof(jit_word_t);
+ }
+#endif
++_jitc->function->call.argf;
}
else {
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
+#if PACKED_STACK
+ _jitc->function->call.size +=
+ _jitc->function->call.size & (sizeof(jit_float32_t) - 1);
+ jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+ _jitc->function->call.size += sizeof(jit_float32_t);
+#else
jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
- jit_unget_reg(regno);
_jitc->function->call.size += sizeof(jit_word_t);
+#endif
+ jit_unget_reg(regno);
+ jit_check_frame();
}
jit_dec_synth();
}
jit_link_prepare();
if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+ if (_jitc->function->call.call & jit_call_varargs) {
+ assert(sizeof(jit_float64_t) == sizeof(jit_word_t));
+ jit_stxi_d(_jitc->function->call.size, JIT_SP,
+ JIT_FA0 - _jitc->function->call.argf);
+ _jitc->function->call.size += sizeof(jit_float64_t);
+ }
+#endif
++_jitc->function->call.argf;
}
else {
+#if PACKED_STACK
+ _jitc->function->call.size +=
+ _jitc->function->call.size & (sizeof(jit_float64_t) - 1);
+#endif
jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
- _jitc->function->call.size += sizeof(jit_word_t);
+ _jitc->function->call.size += sizeof(jit_float64_t);
+ jit_check_frame();
}
jit_dec_synth();
}
jit_link_prepare();
if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+ if (_jitc->function->call.call & jit_call_varargs) {
+ assert(sizeof(jit_float64_t) == sizeof(jit_word_t));
+ jit_stxi_d(_jitc->function->call.size, JIT_SP,
+ JIT_FA0 - _jitc->function->call.argf);
+ _jitc->function->call.size += sizeof(jit_float64_t);
+ }
+#endif
++_jitc->function->call.argf;
}
else {
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
+#if PACKED_STACK
+ _jitc->function->call.size +=
+ _jitc->function->call.size & (sizeof(jit_float64_t) - 1);
+#endif
jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
jit_unget_reg(regno);
- _jitc->function->call.size += sizeof(jit_word_t);
+ _jitc->function->call.size += sizeof(jit_float64_t);
+ jit_check_frame();
}
jit_dec_synth();
}
{
jit_node_t *node;
assert(_jitc->function);
+ jit_check_frame();
jit_inc_synth_w(finishr, r0);
+#if PACKED_STACK
+ _jitc->function->call.size +=
+ _jitc->function->call.size & (sizeof(jit_word_t) - 1);
+#endif
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
node = jit_callr(r0);
{
jit_node_t *node;
assert(_jitc->function);
+ jit_check_frame();
jit_inc_synth_w(finishi, (jit_word_t)i0);
+#if PACKED_STACK
+ _jitc->function->call.size +=
+ _jitc->function->call.size & (sizeof(jit_word_t) - 1);
+#endif
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
node = jit_calli(i0);
_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
{
jit_inc_synth_w(retval_i, r0);
+#if __WORDSIZE == 32
+ jit_movr(r0, JIT_RET);
+#else
jit_extr_i(r0, JIT_RET);
+#endif
jit_dec_synth();
}
+#if __WORDSIZE == 64
void
_jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
{
_jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
{
jit_inc_synth_w(retval_l, r0);
- if (r0 != JIT_RET)
- jit_movr(r0, JIT_RET);
+ jit_movr(r0, JIT_RET);
jit_dec_synth();
}
+#endif
void
_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
jit_node_t *node;
jit_uint8_t *data;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
+ case jit_code_skip:
+ nop((node->u.w + 3) & ~3);
+ break;
case jit_code_note: case jit_code_name:
node->u.w = _jit->pc.w;
break;
case_rrw(rsh, _u);
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_rrr(and,);
case_rrw(and,);
case_rrr(or,);
case_brr(bunord, _d);
case_brd(bunord);
case jit_code_jmpr:
+ jit_check_frame();
jmpr(rn(node->u.w));
break;
case jit_code_jmpi:
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
- word = jmpi_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (s26_p(word))
+ word = jmpi(_jit->pc.w);
+ else
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
- else
+ else {
+ jit_check_frame();
jmpi(node->u.w);
+ }
break;
case jit_code_callr:
+ jit_check_frame();
callr(rn(node->u.w));
break;
case jit_code_calli:
+ jit_check_frame();
if (node->flag & jit_flag_node) {
temp = node->u.n;
assert(temp->code == jit_code_label ||
if (temp->flag & jit_flag_patch)
calli(temp->u.w);
else {
- word = calli_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (s26_p(word))
+ word = calli(_jit->pc.w);
+ else
+ word = calli_p(_jit->pc.w);
patch(word, node);
}
}
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
undo.patch_offset = _jitc->patches.offset;
restart_function:
+ compute_framesize();
+ patch_alist(0);
_jitc->again = 0;
prolog(node);
break;
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ undo.func.need_frame = _jitc->function->need_frame;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
_jitc->patches.offset = undo.patch_offset;
+ patch_alist(1);
goto restart_function;
}
/* remember label is defined */
case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
- case jit_code_arg:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i:
+# if __WORDSIZE == 64
+ case jit_code_arg_l:
+# endif
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+#if __WORDSIZE == 64
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
+#endif
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_i: case jit_code_getarg_ui:
case jit_code_getarg_l:
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+#if __WORDSIZE == 64
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
+#endif
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
+#endif
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
case jit_code_retval_s: case jit_code_retval_us:
case jit_code_retval_i:
+#if __WORDSIZE == 64
case jit_code_retval_ui: case jit_code_retval_l:
+#endif
case jit_code_retval_f: case jit_code_retval_d:
case jit_code_prepare:
case jit_code_finishr: case jit_code_finishi:
stxi_d(i0, rn(r0), rn(r1));
}
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+ jit_int32_t reg;
+ _jitc->framesize = 16; /* ra+fp */
+ for (reg = 0; reg < jit_size(iregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+ _jitc->framesize += sizeof(jit_word_t);
+
+ for (reg = 0; reg < jit_size(fregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+ _jitc->framesize += sizeof(jit_float64_t);
+
+ /* Make sure functions called have a 16 byte aligned stack */
+ _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
static void
_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
{
/*
- * Copyright (C) 2014-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define negr(r0,r1) NEGQ(r1,r0)
# define comr(r0,r1) NOT(r1,r0)
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) CTLZ(r1, r0)
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) CTTZ(r1, r0)
# define addr(r0,r1,r2) ADDQ(r1,r2,r0)
# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
# define jmpr(r0) JMP(_R31_REGNO,r0,0)
# define jmpi(i0) _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*, jit_word_t);
+static jit_word_t _jmpi(jit_state_t*, jit_word_t);
# define jmpi_p(i0) _jmpi_p(_jit,i0)
static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
#define callr(r0) _callr(_jit,r0)
}
again = _jit->pc.w; /* AGAIN */
LDQ_L(r0, r1, 0); /* Load r0 locked */
- jump0 = bner(0, r0, r2); /* bne FAIL r0 r2 */
+ jump0 = bner(_jit->pc.w, r0, r2); /* bne FAIL r0 r2 */
movr(r0, r3); /* Move to r0 to attempt to store */
STQ_C(r0, r1, 0); /* r0 is an in/out argument */
jump1 = _jit->pc.w;
jit_unget_reg(r1_reg);
}
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ ctzr(r0, r0);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_unget_reg(t0);
}
-static void
+static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
jit_word_t w;
if (_s21_p(d))
BR(_R31_REGNO, d);
else
- (void)jmpi_p(i0);
+ w = jmpi_p(i0);
+ return (w);
}
static jit_word_t
/*
- * Copyright (C) 2014-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 168
+#define JIT_INSTR_MAX 88
0, /* data */
0, /* live */
- 4, /* align */
+ 12, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
20, /* va_start */
24, /* va_arg */
44, /* va_arg_d */
32, /* movi */
4, /* movnr */
4, /* movzr */
+ 32, /* casr */
+ 60, /* casi */
8, /* extr_c */
8, /* extr_uc */
8, /* extr_s */
8, /* extr_us */
8, /* extr_i */
8, /* extr_ui */
+ 16, /* bswapr_us */
+ 36, /* bswapr_ui */
+ 36, /* bswapr_ul */
16, /* htonr_us */
36, /* htonr_ui */
36, /* htonr_ul */
16, /* bxsubr_u */
16, /* bxsubi_u */
4, /* jmpr */
- 36, /* jmpi */
+ 4, /* jmpi */
8, /* callr */
36, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 16, /* bswapr_us */
- 36, /* bswapr_ui */
- 36, /* bswapr_ul */
- 32, /* casr */
- 60, /* casi */
+ 8, /* clo */
+ 4, /* clz */
+ 8, /* cto */
+ 4, /* ctz */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2014-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
- if (JIT_RET != u)
- jit_movr(JIT_RET, u);
- jit_live(JIT_RET);
+ jit_code_inc_synth_w(code, u);
+ jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- if (u->code == jit_code_arg)
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
return (jit_arg_f_reg_p(u->u.w));
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
assert(_jitc->function != NULL);
+ assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
if (jit_arg_reg_p(_jitc->function->self.argi))
offset = _jitc->function->self.argi++;
else {
offset = _jitc->function->self.size;
_jitc->function->self.size += 8;
}
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, _A0 - v->u.w);
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, _A0 - v->u.w);
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, _A0 - v->u.w);
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, _A0 - v->u.w);
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_i(u, _A0 - v->u.w);
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_ui, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_ui(u, _A0 - v->u.w);
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_l, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, _A0 - v->u.w);
}
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(_A0 - v->u.w, u);
else
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movi(_A0 - v->u.w, u);
else {
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
assert(_jitc->function != NULL);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movr(_A0 - _jitc->function->call.argi, u);
}
void
-_jit_pushargi(jit_state_t *_jit, jit_int64_t u)
+_jit_pushargi(jit_state_t *_jit, jit_int64_t u, jit_code_t code)
{
jit_int32_t regno;
assert(_jitc->function != NULL);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movi(_A0 - _jitc->function->call.argi, u);
jit_node_t *node;
jit_uint8_t *data;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
+ case jit_code_skip:
+ nop((node->u.w + 3) & ~3);
+ break;
case jit_code_note: case jit_code_name:
node->u.w = _jit->pc.w;
break;
break;
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_rrr(lt,);
case_rrw(lt,);
case_rrr(lt, _u);
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
- word = jmpi_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (_s21_p(word))
+ word = jmpi(_jit->pc.w);
+ else
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
- case jit_code_arg:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i: case jit_code_arg_l:
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_i: case jit_code_getarg_ui:
case jit_code_getarg_l:
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# define jit_armv5e_p() (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
# define jit_armv6_p() (jit_cpu.version >= 6)
# define jit_armv7_p() (jit_cpu.version >= 7)
-# define jit_armv7r_p() 0
-# define stack_framesize 48
+# define jit_armv7r_p() (jit_cpu.version > 7 || (jit_cpu.version == 7 && jit_cpu.extend))
extern int __aeabi_idivmod(int, int);
extern unsigned __aeabi_uidivmod(unsigned, unsigned);
# define _R0_REGNO 0x00
# define THUMB2_UMULL 0xfba00000
# define ARM_SMULL 0x00c00090
# define THUMB2_SMULL 0xfb800000
+/* >> ARMv7r */
+# define ARM_SDIV 0x07100010
+# define ARM_UDIV 0x07300010
# define THUMB2_SDIV 0xfb90f0f0
# define THUMB2_UDIV 0xfbb0f0f0
+/* << ARMv7r */
# define ARM_AND 0x00000000
# define THUMB_AND 0x4000
# define THUMB2_AND 0xea000000
# define ARM_STREX 0x01800090
# define THUMB2_STREX 0xe8400000
/* << ARMv6* */
+/* >> ARMv6t2 */
+# define THUMB2_CLZ 0xfab0f080
+# define THUMB2_RBIT 0xfa90f0a0
+# define ARM_RBIT 0x06f00030
+/* << ARMv6t2 */
+# define ARM_CLZ 0x01600010
/* >> ARMv7 */
# define ARM_DMB 0xf57ff050
# define THUMB2_DMB 0xf3bf8f50
# define NOT(rd,rm) CC_NOT(ARM_CC_AL,rd,rm)
# define T1_NOT(rd,rm) T1_MVN(rd,rm)
# define T2_NOT(rd,rm) T2_MVN(rd,rm)
+# define T2_CLZ(rd,rm) torrr(THUMB2_CLZ,rm,rd,rm)
+# define CC_CLZ(cc,rd,rm) corrrr(cc,ARM_CLZ,_R15_REGNO,rd,_R15_REGNO,rm)
+# define CLZ(rd,rm) CC_CLZ(ARM_CC_AL,rd,rm)
+# define T2_RBIT(rd,rm) torrr(THUMB2_RBIT,rm,rd,rm)
+# define CC_RBIT(cc,rd,rm) corrrr(cc,ARM_RBIT,_R15_REGNO,rd,_R15_REGNO,rm)
+# define RBIT(rd,rm) CC_RBIT(ARM_CC_AL,rd,rm)
# define NOP() MOV(_R0_REGNO, _R0_REGNO)
# define T1_NOP() is(0xbf00)
# define CC_ADD(cc,rd,rn,rm) corrr(cc,ARM_ADD,rn,rd,rm)
# define CC_UMULL(cc,rl,rh,rn,rm) corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
# define UMULL(rl,rh,rn,rm) CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
# define T2_UMULL(rl,rh,rn,rm) torrrr(THUMB2_UMULL,rn,rl,rh,rm)
+# define CC_SDIV(cc,rd,rn,rm) corrrr(cc,ARM_SDIV,rd,15,rn,rm)
+# define SDIV(rd,rn,rm) CC_SDIV(ARM_CC_AL,rd,rm,rn)
+# define CC_UDIV(cc,rd,rn,rm) corrrr(cc,ARM_UDIV,rd,15,rn,rm)
+# define UDIV(rd,rn,rm) CC_UDIV(ARM_CC_AL,rd,rm,rn)
# define T2_SDIV(rd,rn,rm) torrr(THUMB2_SDIV,rn,rd,rm)
# define T2_UDIV(rd,rn,rm) torrr(THUMB2_UDIV,rn,rd,rm)
# define CC_AND(cc,rd,rn,rm) corrr(cc,ARM_AND,rn,rd,rm)
# define T2_POP(im) tpp(THUMB2_POP,im)
# define jit_get_reg_args() \
do { \
+ CHECK_REG_ARGS(); \
+ jit_check_frame(); \
(void)jit_get_reg(_R0|jit_class_named|jit_class_gpr); \
(void)jit_get_reg(_R1|jit_class_named|jit_class_gpr); \
(void)jit_get_reg(_R2|jit_class_named|jit_class_gpr); \
static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
# define negr(r0,r1) _negr(_jit,r0,r1)
static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# define addr(r0,r1,r2) _addr(_jit,r0,r1,r2)
static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
static void _epilog(jit_state_t*,jit_node_t*);
# define callr(r0) _callr(_jit,r0)
static void _callr(jit_state_t*,jit_int32_t);
-# define calli(i0) _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
-# define calli_p(i0) _calli_p(_jit,i0)
-static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+# define calli(i0,i1) _calli(_jit,i0,i1)
+static void _calli(jit_state_t*,jit_word_t,jit_bool_t);
+# define calli_p(i0,i1) _calli_p(_jit,i0,i1)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_bool_t);
# define vastart(r0) _vastart(_jit, r0)
static void _vastart(jit_state_t*, jit_int32_t);
# define vaarg(r0, r1) _vaarg(_jit, r0, r1)
assert(!(o & 0x0000ffff));
if (o == THUMB2_PUSH)
assert(!(im & 0x8000));
- assert(__builtin_popcount(im & 0x1fff) > 1);
+ assert(__builtin_popcount(im & 0x7fff) > 1);
thumb.i = o|im;
iss(thumb.s[0], thumb.s[1]);
}
RSBI(r0, r1, 0);
}
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (!jit_thumb_p() && jit_armv5e_p())
+ CLZ(r0, r1);
+ else if (jit_thumb_p() && jit_armv7_p()) { /* armv6t2 actually */
+ T2_CLZ(r0, r1);
+ }
+ else
+ fallback_clz(r0, r0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_armv7_p()) { /* armv6t2 actually */
+ if (jit_thumb_p())
+ T2_RBIT(r0, r1);
+ else
+ RBIT(r0, r1);
+ clor(r0, r0);
+ }
+ else
+ fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_armv7_p()) { /* armv6t2 actually */
+ if (jit_thumb_p())
+ T2_RBIT(r0, r1);
+ else
+ RBIT(r0, r1);
+ clzr(r0, r0);
+ }
+ else
+ fallback_ctz(r0, r1);
+}
+
static void
_addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
static void
_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- if (jit_armv7r_p() && jit_thumb_p())
- T2_SDIV(r0, r1, r2);
+ if (jit_armv7r_p()) {
+ if (jit_thumb_p())
+ T2_SDIV(r0, r1, r2);
+ else
+ SDIV(r0, r1, r2);
+ }
else
divrem(1, 1, r0, r1, r2);
}
static void
_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- if (jit_armv7r_p() && jit_thumb_p())
- T2_UDIV(r0, r1, r2);
+ if (jit_armv7r_p()) {
+ if (jit_thumb_p())
+ T2_UDIV(r0, r1, r2);
+ else
+ UDIV(r0, r1, r2);
+ }
else
divrem(1, 0, r0, r1, r2);
}
static void
_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- divrem(0, 1, r0, r1, r2);
+ if (jit_armv7r_p()) {
+ jit_int32_t reg;
+ if (r0 == r1 || r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ divr(rn(reg), r1, r2);
+ mulr(rn(reg), r2, rn(reg));
+ subr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ divr(r0, r1, r2);
+ mulr(r0, r2, r0);
+ subr(r0, r1, r0);
+ }
+ }
+ else
+ divrem(0, 1, r0, r1, r2);
}
static void
static void
_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- divrem(0, 0, r0, r1, r2);
+ if (jit_armv7r_p()) {
+ jit_int32_t reg;
+ if (r0 == r1 || r0 == r2) {
+ reg = jit_get_reg(jit_class_gpr);
+ divr_u(rn(reg), r1, r2);
+ mulr(rn(reg), r2, rn(reg));
+ subr(r0, r1, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else {
+ divr_u(r0, r1, r2);
+ mulr(r0, r2, r0);
+ subr(r0, r1, r0);
+ }
+ }
+ else
+ divrem(0, 0, r0, r1, r2);
}
static void
jit_word_t w;
jit_word_t d;
jit_int32_t reg;
+ /* i1 means jump is reachable in signed 24 bits */
if (i1) {
- /* Assume jump is not longer than 23 bits if inside jit */
w = _jit->pc.w;
/* if thumb and in thumb mode */
if (jit_thumb_p() && _jitc->thumb) {
}
static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t exchange_p)
{
jit_word_t d;
jit_int32_t reg;
- d = ((i0 - _jit->pc.w) >> 2) - 2;
- if (!jit_exchange_p() && !jit_thumb_p() && _s24P(d))
- BLI(d & 0x00ffffff);
+ if (!exchange_p) {
+ if (jit_thumb_p()) {
+ if (jit_exchange_p())
+ /* skip switch from arm to thumb
+ * exchange_p set to zero means a jit function
+ * call in the same jit code buffer */
+ d = ((i0 + 8 - _jit->pc.w) >> 1) - 2;
+ else
+ d = ((i0 - _jit->pc.w) >> 1) - 2;
+ }
+ else d = ((i0 - _jit->pc.w) >> 2) - 2;
+ if (_s24P(d)) {
+ if (jit_thumb_p()) T2_BLI(encode_thumb_jump(d));
+ else BLI(d & 0x00ffffff);
+ }
+ else goto fallback;
+ }
else {
+ fallback:
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
if (jit_thumb_p())
}
static jit_word_t
-_calli_p(jit_state_t *_jit, jit_word_t i0)
+_calli_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
{
jit_word_t w;
+ jit_word_t d;
jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- w = _jit->pc.w;
- movi_p(rn(reg), i0);
- if (jit_thumb_p())
- T1_BLX(rn(reg));
- else
- BLX(rn(reg));
- jit_unget_reg(reg);
+ /* i1 means call is reachable in signed 24 bits */
+ if (i1) {
+ w = _jit->pc.w;
+ if (jit_thumb_p()) d = ((i0 - _jit->pc.w) >> 1) - 2;
+ else d = ((i0 - _jit->pc.w) >> 2) - 2;
+ assert(_s24P(d));
+ if (jit_thumb_p()) T2_BLI(encode_thumb_jump(d));
+ else BLI(d & 0x00ffffff);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ w = _jit->pc.w;
+ movi_p(rn(reg), i0);
+ if (jit_thumb_p())
+ T1_BLX(rn(reg));
+ else
+ BLX(rn(reg));
+ jit_unget_reg(reg);
+ }
return (w);
}
static void
_prolog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t reg;
+ jit_int32_t reg, mask, count;
if (_jitc->function->define_frame || _jitc->function->assume_frame) {
jit_int32_t frame = -_jitc->function->frame;
+ jit_check_frame();
assert(_jitc->function->self.aoff >= frame);
+ if (jit_swf_p())
+ CHECK_SWF_OFFSET();
+ CHECK_REG_ARGS();
if (_jitc->function->assume_frame) {
if (jit_thumb_p() && !_jitc->thumb)
_jitc->thumb = _jit->pc.w;
_jitc->function->stack = ((_jitc->function->self.alen -
/* align stack at 8 bytes */
_jitc->function->self.aoff) + 7) & -8;
+ /* If this jit_check_frame() succeeds, it actually is just a need_stack,
+ * usually for arguments, so, allocai was not called, but pusharg*
+ * was called increasing stack size, for negative access offsets.
+ * This can be optimized for one less prolog instruction, that is,
+ * do not create the frame pointer, and only add _jitc->function->stack
+ * to sp, and on epilog, instead of moving fp to sp, just add negative
+ * value of _jitc->function->stack. Since this condition requires a
+ * large function body for excess arguments to called function, keep
+ * things a bit simpler for now, as this is the only place need_stack
+ * would be useful. */
+ if (_jitc->function->stack)
+ jit_check_frame();
+
+ for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ mask |= 1 << rn(iregs[reg]);
+ ++count;
+ }
+ }
+ /* One extra register to keep stack 8 bytes aligned */
+ if (count & 1) {
+ for (reg = 4; reg < 10; reg++) {
+ if (!(mask & (1 << reg))) {
+ mask |= 1 << reg;
+ break;
+ }
+ }
+ }
+ if (_jitc->function->need_frame || _jitc->function->need_return)
+ mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
+ if (!jit_swf_p() && _jitc->function->save_reg_args &&
+ !(_jitc->function->self.call & jit_call_varargs))
+ mask |= 0xf;
if (jit_thumb_p()) {
/* switch to thumb mode (better approach would be to
* ORR 1 address being called, but no clear distinction
* of what is a pointer to a jit function, or if patching
* a pointer to a jit function) */
- ADDI(_R12_REGNO, _R15_REGNO, 1);
- BX(_R12_REGNO);
+ if (jit_exchange_p()) {
+ ADDI(_R12_REGNO, _R15_REGNO, 1);
+ BX(_R12_REGNO);
+ }
if (!_jitc->thumb)
_jitc->thumb = _jit->pc.w;
- if (jit_cpu.abi) {
- T2_PUSH(0xf);
- T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- VPUSH_F64(_D8_REGNO, 8);
- }
- else {
+ if (jit_swf_p() || (_jitc->function->save_reg_args &&
+ (_jitc->function->self.call & jit_call_varargs)))
T2_PUSH(0xf);
- T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- }
+ if (mask)
+ T2_PUSH(mask);
}
else {
- if (jit_cpu.abi) {
- PUSH(0xf);
- PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- VPUSH_F64(_D8_REGNO, 8);
- }
- else {
+ if (jit_swf_p() || (_jitc->function->save_reg_args &&
+ (_jitc->function->self.call & jit_call_varargs)))
PUSH(0xf);
- PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- }
+ if (mask)
+ PUSH(mask);
}
- movr(_FP_REGNO, _SP_REGNO);
+ if (_jitc->function->need_frame)
+ movr(_FP_REGNO, _SP_REGNO);
if (_jitc->function->stack)
subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
if (_jitc->function->allocar) {
static void
_epilog(jit_state_t *_jit, jit_node_t *node)
{
+ jit_int32_t reg, mask, count;
if (_jitc->function->assume_frame)
return;
- movr(_SP_REGNO, _FP_REGNO);
- if (jit_cpu.abi)
- VPOP_F64(_D8_REGNO, 8);
- if (jit_thumb_p())
- T2_POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- else
- POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
- addi(_SP_REGNO, _SP_REGNO, 16);
+ for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ mask |= 1 << rn(iregs[reg]);
+ ++count;
+ }
+ }
+ /* One extra register to keep stack 8 bytes aligned */
+ if (count & 1) {
+ for (reg = 4; reg < 10; reg++) {
+ if (!(mask & (1 << reg))) {
+ mask |= 1 << reg;
+ break;
+ }
+ }
+ }
+ if (_jitc->function->need_frame || _jitc->function->need_return)
+ mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
+ if (_jitc->function->need_frame)
+ movr(_SP_REGNO, _FP_REGNO);
+ if (!jit_swf_p() && _jitc->function->save_reg_args &&
+ !(_jitc->function->self.call & jit_call_varargs))
+ addi(_SP_REGNO, _SP_REGNO, 16);
+ if (mask) {
+ if (jit_thumb_p())
+ T2_POP(mask);
+ else
+ POP(mask);
+ }
+ if (jit_swf_p() || (_jitc->function->save_reg_args &&
+ (_jitc->function->self.call & jit_call_varargs)))
+ addi(_SP_REGNO, _SP_REGNO, 16);
if (jit_thumb_p())
T1_BX(_LR_REGNO);
else
* The -16 is to account for the 4 argument registers
* always saved, and _jitc->function->vagp is to account
* for declared arguments. */
- addi(r0, _FP_REGNO, _jitc->function->self.size -
- 16 + _jitc->function->vagp);
+ addi(r0, _FP_REGNO, jit_selfsize() - 16 + _jitc->function->vagp);
}
static void
jit_word_t w;
} u;
u.w = instr;
- if (kind == arm_patch_jump) {
+ if (kind == arm_patch_call) {
+ if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
+ code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+ assert((thumb.i & THUMB2_BLI) == THUMB2_BLI);
+ /* skip code to switch from arm to thumb mode */
+ if (jit_exchange_p())
+ d = ((label + 8 - instr) >> 1) - 2;
+ else
+ d = ((label - instr) >> 1) - 2;
+ assert(_s24P(d));
+ thumb.i = THUMB2_BLI | encode_thumb_jump(d);
+ thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+ }
+ else {
+ thumb.i = u.i[0];
+ assert((thumb.i & 0x0f000000) == ARM_BLI);
+ d = ((label - instr) >> 2) - 2;
+ assert(_s24P(d));
+ u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
+ }
+ }
+ else if (kind == arm_patch_jump) {
if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
if ((thumb.i & THUMB2_B) == THUMB2_B) {
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
jit_int32_t r0, jit_int32_t r1)
{
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
else
jit_int32_t r0, jit_int32_t r1)
{
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p())
LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1) || jit_fpr_p(r2))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
else
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1) || jit_fpr_p(r2))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p())
LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
jit_float32_t f;
} data;
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
data.f = i1;
if (jit_fpr_p(r1))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
jit_float32_t f;
} data;
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
data.f = i0;
movi(_R0_REGNO, data.i);
if (jit_fpr_p(r1))
jit_float64_t d;
} data;
jit_get_reg_args();
-
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
data.d = i1;
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p())
jit_float64_t d;
} data;
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
data.d = i0;
movi(_R0_REGNO, data.i[0]);
movi(_R1_REGNO, data.i[1]);
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_get_reg_args();
+ if (jit_fpr_p(r1) || jit_fpr_p(r2))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
else
jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_get_reg_args();
+ if (jit_fpr_p(r1) || jit_fpr_p(r2))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p())
LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
jit_float32_t f;
} data;
jit_get_reg_args();
+ if (jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
data.f = i1;
if (jit_fpr_p(r1))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
jit_float64_t d;
} data;
jit_get_reg_args();
+ if (jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
data.d = i1;
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p())
{
jit_word_t instr;
jit_get_reg_args();
+ if (jit_fpr_p(r1) || jit_fpr_p(r2))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
else
{
jit_word_t instr;
jit_get_reg_args();
+ if (jit_fpr_p(r1) || jit_fpr_p(r2))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p())
LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
jit_float32_t f;
} data;
jit_get_reg_args();
+ if (jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
data.f = i1;
if (jit_fpr_p(r1))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
jit_float64_t d;
} data;
jit_get_reg_args();
+ if (jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
data.d = i1;
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p())
{
jit_word_t w, d;
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r0))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
else
{
jit_word_t w, d;
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r0)) {
if (!jit_thumb_p() && jit_armv5e_p())
LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
} data;
jit_word_t w, d;
jit_get_reg_args();
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
data.f = i2;
if (jit_fpr_p(r0))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
jit_float64_t d;
} data;
jit_get_reg_args();
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
data.d = i2;
if (jit_fpr_p(r0)) {
if (!jit_thumb_p() && jit_armv5e_p())
{
jit_word_t w, d, j0, j1;
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r0))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
else
{
jit_word_t w, d, j0, j1;
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r0)) {
if (!jit_thumb_p() && jit_armv5e_p())
LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
jit_word_t w, d, j0, j1;
data.f = i1;
jit_get_reg_args();
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r0))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
else
jit_float64_t d;
} data;
jit_get_reg_args();
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
data.d = i1;
if (jit_fpr_p(r0)) {
if (!jit_thumb_p() && jit_armv5e_p())
_swf_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_get_reg_args();
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
movr(_R0_REGNO, r1);
swf_call(__aeabi_i2f, i2f, _R1_REGNO);
if (jit_fpr_p(r0))
_swf_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_get_reg_args();
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
movr(_R0_REGNO, r1);
swf_call(__aeabi_i2d, i2d, _R2_REGNO);
if (jit_fpr_p(r0)) {
_swf_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p())
LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
_swf_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_get_reg_args();
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
else
jit_word_t slow_not_nan;
#endif
jit_get_reg_args();
+ if (jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1))
swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
else
jit_word_t slow_not_nan;
#endif
jit_get_reg_args();
+ if (jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p())
LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
{
jit_int32_t reg;
if (r0 != r1) {
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
reg = jit_get_reg(jit_class_gpr);
swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
{
jit_int32_t reg;
if (r0 != r1) {
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
if (!jit_thumb_p() && jit_armv5e_p() &&
(reg = jit_get_reg_pair()) != JIT_NOREG) {
jit_float32_t f;
} data;
jit_int32_t reg;
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
data.f = i0;
if (jit_fpr_p(r0)) {
reg = jit_get_reg(jit_class_gpr);
jit_int32_t i[2];
jit_float64_t d;
} data;
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
data.d = i0;
if (jit_fpr_p(r0)) {
if (!jit_thumb_p() && jit_armv5e_p() &&
_swf_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
reg = jit_get_reg(jit_class_gpr);
swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
_swf_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
_swf_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
reg = jit_get_reg(jit_class_gpr);
swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
_swf_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
+ if (jit_fpr_p(r0) || jit_fpr_p(r1))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r1)) {
if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
{
jit_int32_t reg;
if (jit_fpr_p(r0)) {
+ CHECK_SWF_OFFSET();
reg = jit_get_reg(jit_class_gpr);
ldxi_i(rn(reg), r1, 0);
swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
{
jit_int32_t reg;
if (jit_fpr_p(r0)) {
+ CHECK_SWF_OFFSET();
if (!jit_thumb_p() && jit_armv5e_p() &&
(reg = jit_get_reg_pair()) != JIT_NOREG) {
LDRDI(rn(reg), r1, 0);
{
jit_int32_t reg;
if (jit_fpr_p(r0)) {
+ CHECK_SWF_OFFSET();
reg = jit_get_reg(jit_class_gpr);
ldi_i(rn(reg), i0);
swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
_swf_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t rg0, rg1;
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
(rg0 = jit_get_reg_pair()) != JIT_NOREG) {
movi(rn(rg0), i0);
{
jit_int32_t reg;
if (jit_fpr_p(r0)) {
+ CHECK_SWF_OFFSET();
reg = jit_get_reg(jit_class_gpr);
ldxr_i(rn(reg), r1, r2);
swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
{
jit_int32_t rg0, rg1;
if (jit_fpr_p(r0)) {
+ CHECK_SWF_OFFSET();
if (!jit_thumb_p() && jit_armv5e_p() &&
(rg0 = jit_get_reg_pair()) != JIT_NOREG) {
LDRD(rn(rg0), r1, r2);
_swf_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
+ if (jit_fpr_p(r0))
+ CHECK_SWF_OFFSET();
if (jit_fpr_p(r0)) {
reg = jit_get_reg(jit_class_gpr);
ldxi_i(rn(reg), r1, i0);
{
jit_int32_t rg0, rg1;
if (jit_fpr_p(r0)) {
+ CHECK_SWF_OFFSET();
if (!jit_thumb_p() && jit_armv5e_p() &&
((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
(rg0 = jit_get_reg_pair()) != JIT_NOREG) {
{
jit_int32_t reg;
if (jit_fpr_p(r1)) {
+ CHECK_SWF_OFFSET();
reg = jit_get_reg(jit_class_gpr);
swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
stxi_i(0, r0, rn(reg));
{
jit_int32_t reg;
if (jit_fpr_p(r1)) {
+ CHECK_SWF_OFFSET();
if (!jit_thumb_p() && jit_armv5e_p() &&
(reg = jit_get_reg_pair()) != JIT_NOREG) {
LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
{
jit_int32_t reg;
if (jit_fpr_p(r0)) {
+ CHECK_SWF_OFFSET();
reg = jit_get_reg(jit_class_gpr);
swf_ldrin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
sti_i(i0, rn(reg));
{
jit_int32_t rg0, rg1;
if (jit_fpr_p(r0)) {
+ CHECK_SWF_OFFSET();
if (!jit_thumb_p() && jit_armv5e_p() &&
(rg0 = jit_get_reg_pair()) != JIT_NOREG) {
rg1 = jit_get_reg(jit_class_gpr);
{
jit_int32_t reg;
if (jit_fpr_p(r2)) {
+ CHECK_SWF_OFFSET();
reg = jit_get_reg(jit_class_gpr);
swf_ldrin(rn(reg), _FP_REGNO, swf_off(r2) + 8);
stxr_i(r1, r0, rn(reg));
{
jit_int32_t rg0, rg1;
if (jit_fpr_p(r2)) {
+ CHECK_SWF_OFFSET();
if (!jit_thumb_p() && jit_armv5e_p() &&
(rg0 = jit_get_reg_pair()) != JIT_NOREG) {
LDRDIN(rn(rg0), _FP_REGNO, swf_off(r2) + 8);
{
jit_int32_t reg;
if (jit_fpr_p(r1)) {
+ CHECK_SWF_OFFSET();
reg = jit_get_reg(jit_class_gpr);
swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
stxi_i(i0, r0, rn(reg));
{
jit_int32_t rg0, rg1;
if (jit_fpr_p(r1)) {
+ CHECK_SWF_OFFSET();
if (!jit_thumb_p() && jit_armv5e_p() &&
((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
(rg0 = jit_get_reg_pair()) != JIT_NOREG) {
#if __WORDSIZE == 32
#if defined(__ARM_PCS_VFP)
-#define JIT_INSTR_MAX 48
+#define JIT_INSTR_MAX 50
0, /* data */
0, /* live */
- 2, /* align */
+ 14, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
2, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
16, /* va_arg_d */
8, /* movi */
8, /* movnr */
8, /* movzr */
+ 42, /* casr */
+ 50, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
+ 8, /* bswapr_us */
+ 4, /* bswapr_ui */
+ 0, /* bswapr_ul */
8, /* htonr_us */
4, /* htonr_ui */
0, /* htonr_ul */
4, /* callr */
20, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
8, /* extr_d */
4, /* extr_f_d */
4, /* movr_d */
- 16, /* movi_d */
+ 32, /* movi_d */
4, /* ldr_d */
12, /* ldi_d */
8, /* ldxr_d */
12, /* bler_d */
28, /* blei_d */
12, /* beqr_d */
- 28, /* beqi_d */
+ 36, /* beqi_d */
12, /* bger_d */
28, /* bgei_d */
12, /* bgtr_d */
12, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 8, /* bswapr_us */
- 4, /* bswapr_ui */
- 0, /* bswapr_ul */
- 40, /* casr */
- 48, /* casi */
+ 8, /* clo */
+ 4, /* clz */
+ 12, /* cto */
+ 8, /* ctz */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
#if __WORDSIZE == 32
#if !defined(__ARM_PCS_VFP)
-#define JIT_INSTR_MAX 160
+#define JIT_INSTR_MAX 50
0, /* data */
0, /* live */
- 2, /* align */
+ 18, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
2, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
28, /* va_arg_d */
8, /* movi */
8, /* movnr */
8, /* movzr */
+ 42, /* casr */
+ 46, /* casi */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
8, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
+ 20, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 0, /* bswapr_ul */
20, /* htonr_us */
16, /* htonr_ui */
0, /* htonr_ul */
4, /* callr */
20, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* retval_i */
0, /* retval_ui */
0, /* retval_l */
- 160, /* epilog */
+ 30, /* epilog */
0, /* arg_f */
0, /* getarg_f */
0, /* putargr_f */
28, /* extr_f */
22, /* extr_d_f */
8, /* movr_f */
- 12, /* movi_f */
+ 16, /* movi_f */
8, /* ldr_f */
16, /* ldi_f */
8, /* ldxr_f */
28, /* bler_f */
32, /* blei_f */
28, /* beqr_f */
- 40, /* beqi_f */
+ 48, /* beqi_f */
28, /* bger_f */
32, /* bgei_f */
28, /* bgtr_f */
72, /* unordi_d */
20, /* truncr_d_i */
0, /* truncr_d_l */
- 28, /* extr_d */
+ 36, /* extr_d */
22, /* extr_f_d */
16, /* movr_d */
- 20, /* movi_d */
+ 32, /* movi_d */
16, /* ldr_d */
24, /* ldi_d */
20, /* ldxr_d */
12, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 20, /* bswapr_us */
- 16, /* bswapr_ui */
- 0, /* bswapr_ul */
- 40, /* casr */
- 44, /* casi */
+ 8, /* clo */
+ 4, /* clz */
+ 12, /* cto */
+ 8, /* ctz */
#endif /* __ARM_PCS_VFP */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
if (jit_fpr_p(r0)) {
/* float arguments are packed, for others,
* lightning only address even registers */
- if (!(r0 & 1) && (r0 - 16) >= 0 &&
+ if (!(r0 & 1) && (r0 - 32) >= 0 &&
((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
(code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
VIMM(code, r0);
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# include <stdio.h>
#endif
+#define stack_framesize 48
+
#define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4)
#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 16)
#define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 15)
#define arm_patch_node 0x80000000
#define arm_patch_word 0x40000000
#define arm_patch_jump 0x20000000
-#define arm_patch_load 0x00000000
+#define arm_patch_load 0x10000000
+#define arm_patch_call 0x08000000
#define jit_fpr_p(rn) ((rn) > 15)
-#define arg_base() \
- (stack_framesize - 16 + (jit_cpu.abi ? 64 : 0))
+#define arg_base() (stack_framesize - 16)
#define arg_offset(n) \
((n) < 4 ? arg_base() + ((n) << 2) : (n))
* arm mode, what may cause a crash upon return of that function
* if generating jit for a relative jump.
*/
-#define jit_exchange_p() 1
+#define jit_exchange_p() jit_cpu.exchange
/* FIXME is it really required to not touch _R10? */
+#define CHECK_REG_ARGS() \
+ do { \
+ if (!_jitc->function->save_reg_args) \
+ _jitc->again = _jitc->function->save_reg_args = 1; \
+ } while (0)
+
+#define CHECK_SWF_OFFSET() \
+ do { \
+ if (!_jitc->function->swf_offset) { \
+ _jitc->again = _jitc->function->save_reg_args = \
+ _jitc->function->swf_offset = 1; \
+ _jitc->function->self.aoff = -64; \
+ } \
+ } while (0)
+
+#define CHECK_RETURN() \
+ do { \
+ if (!_jitc->function->need_frame && \
+ !_jitc->function->need_return) \
+ _jitc->again = _jitc->function->need_return = 1; \
+ } while (0)
+
/*
* Types
*/
/*
* Prototypes
*/
-#define jit_make_arg(node) _jit_make_arg(_jit,node)
-static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*);
+#define jit_make_arg(node,code) _jit_make_arg(_jit,node,code)
+static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t);
#define jit_make_arg_f(node) _jit_make_arg_f(_jit,node)
static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
#define jit_make_arg_d(node) _jit_make_arg_d(_jit,node)
static void _flush_consts(jit_state_t*);
#define invalidate_consts() _invalidate_consts(_jit)
static void _invalidate_consts(jit_state_t*);
-#define patch(instr, node) _patch(_jit, instr, node)
-static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+#define compute_framesize() _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
+#define patch(instr, node, kind) _patch(_jit, instr, node, kind)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*,jit_int32_t);
#if defined(__GNUC__)
/* libgcc */
{ _NOREG, "<none>" },
};
+static jit_int32_t iregs[] = {
+ _R4, _R5, _R6, _R7, _R8, _R9,
+};
+
/*
* Implementation
*/
/* armv6t2 todo (software float and thumb2) */
if (!jit_cpu.vfp && jit_cpu.thumb)
jit_cpu.thumb = 0;
+ /* FIXME need test environments for the below. For the moment just
+ * be very conservative */
+ /* force generation of code assuming jit and function libraries called
+ * instruction set do not match */
+ jit_cpu.exchange = 1;
+ /* do not generate hardware integer division by default */
+ if (jit_cpu.version == 7)
+ jit_cpu.extend = 0;
}
void
}
_jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
_jitc->function->self.size = stack_framesize;
- if (jit_cpu.abi)
- _jitc->function->self.size += 64;
_jitc->function->self.argi = _jitc->function->self.argf =
- _jitc->function->self.alen = 0;
- if (jit_swf_p())
- /* 8 soft float registers */
- _jitc->function->self.aoff = -64;
- else
- _jitc->function->self.aoff = 0;
+ _jitc->function->self.alen = _jitc->function->self.aoff = 0;
+ _jitc->function->swf_offset = _jitc->function->save_reg_args =
+ _jitc->function->need_return = 0;
_jitc->function->self.call = jit_call_default;
jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
_jitc->reglen * sizeof(jit_int32_t));
_jit_allocai(jit_state_t *_jit, jit_int32_t length)
{
assert(_jitc->function);
+ if (jit_swf_p())
+ CHECK_SWF_OFFSET();
+ jit_check_frame();
switch (length) {
case 0: case 1: break;
case 2: _jitc->function->self.aoff &= -2; break;
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
- if (JIT_RET != u)
- jit_movr(JIT_RET, u);
- jit_live(JIT_RET);
+ jit_code_inc_synth_w(code, u);
+ jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- if (u->code != jit_code_arg) {
+ if (!(u->code >= jit_code_arg_c && u->code <= jit_code_arg)) {
if (u->code == jit_code_arg_f) {
if (jit_cpu.abi)
return (jit_arg_f_reg_p(u->u.w));
}
static jit_node_t *
-_jit_make_arg(jit_state_t *_jit, jit_node_t *node)
+_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code)
{
jit_int32_t offset;
if (jit_arg_reg_p(_jitc->function->self.argi))
_jitc->function->self.size += sizeof(jit_word_t);
}
if (node == (jit_node_t *)0)
- node = jit_new_node(jit_code_arg);
+ node = jit_new_node(code);
else
link_node(node);
node->u.w = offset;
else {
assert(!(_jitc->function->self.call & jit_call_varargs));
_jitc->function->self.call |= jit_call_varargs;
+ CHECK_REG_ARGS();
if (jit_cpu.abi && _jitc->function->self.argf)
rewind_prolog();
- /* First 4 stack addresses are always spilled r0-r3 */
+ /* First 4 stack addresses need to be spilled r0-r3 */
if (jit_arg_reg_p(_jitc->function->self.argi))
_jitc->function->vagp = _jitc->function->self.argi * 4;
else
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
assert(_jitc->function);
- return (jit_make_arg((jit_node_t*)0));
+ assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
+ return (jit_make_arg((jit_node_t*)0, code));
}
jit_node_t *
_jit_arg_f(jit_state_t *_jit)
{
assert(_jitc->function);
+ assert(!(_jitc->function->self.call & jit_call_varargs));
return (jit_make_arg_f((jit_node_t*)0));
}
_jit_arg_d(jit_state_t *_jit)
{
assert(_jitc->function);
+ assert(!(_jitc->function->self.call & jit_call_varargs));
return (jit_make_arg_d((jit_node_t*)0));
}
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ jit_node_t *node = NULL;
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (jit_swf_p())
- jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w));
+ node = jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w));
else if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, JIT_RA0 - v->u.w);
else
- jit_ldxi_c(u, JIT_FP, v->u.w);
+ node = jit_ldxi_c(u, JIT_FP, v->u.w);
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ jit_node_t *node = NULL;
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (jit_swf_p())
- jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w));
+ node = jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w));
else if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, JIT_RA0 - v->u.w);
else
- jit_ldxi_uc(u, JIT_FP, v->u.w);
+ node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ jit_node_t *node = NULL;
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (jit_swf_p())
- jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w));
+ node = jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w));
else if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, JIT_RA0 - v->u.w);
else
- jit_ldxi_s(u, JIT_FP, v->u.w);
+ node = jit_ldxi_s(u, JIT_FP, v->u.w);
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ jit_node_t *node = NULL;
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (jit_swf_p())
- jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w));
+ node = jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w));
else if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, JIT_RA0 - v->u.w);
else
- jit_ldxi_us(u, JIT_FP, v->u.w);
+ node = jit_ldxi_us(u, JIT_FP, v->u.w);
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ jit_node_t *node = NULL;
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_swf_p())
- jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w));
+ node = jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w));
else if (jit_arg_reg_p(v->u.w))
jit_movr(u, JIT_RA0 - v->u.w);
else
- jit_ldxi_i(u, JIT_FP, v->u.w);
+ node = jit_ldxi_i(u, JIT_FP, v->u.w);
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ jit_node_t *node = NULL;
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_swf_p())
- jit_stxi(arg_offset(v->u.w), JIT_FP, u);
+ node = jit_stxi(arg_offset(v->u.w), JIT_FP, u);
else if (jit_arg_reg_p(v->u.w))
jit_movr(JIT_RA0 - v->u.w, u);
else
- jit_stxi(v->u.w, JIT_FP, u);
+ node = jit_stxi(v->u.w, JIT_FP, u);
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
- jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ jit_int32_t regno;
+ jit_node_t *node = NULL;
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_swf_p()) {
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
- jit_stxi(arg_offset(v->u.w), JIT_FP, regno);
+ node = jit_stxi(arg_offset(v->u.w), JIT_FP, regno);
jit_unget_reg(regno);
}
else if (jit_arg_reg_p(v->u.w))
else {
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
- jit_stxi(v->u.w, JIT_FP, regno);
+ node = jit_stxi(v->u.w, JIT_FP, regno);
jit_unget_reg(regno);
}
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
+ jit_node_t *node = NULL;
assert(v->code == jit_code_arg_f);
jit_inc_synth_wp(getarg_f, u, v);
if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
if (jit_arg_f_reg_p(v->u.w))
jit_movr_f(u, JIT_FA0 - v->u.w);
else
- jit_ldxi_f(u, JIT_FP, v->u.w);
+ node = jit_ldxi_f(u, JIT_FP, v->u.w);
}
else if (jit_swf_p())
- jit_ldxi_f(u, JIT_FP, arg_offset(v->u.w));
+ node = jit_ldxi_f(u, JIT_FP, arg_offset(v->u.w));
else {
if (jit_arg_reg_p(v->u.w))
jit_movr_w_f(u, JIT_RA0 - v->u.w);
else
- jit_ldxi_f(u, JIT_FP, v->u.w);
+ node = jit_ldxi_f(u, JIT_FP, v->u.w);
+ }
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
}
jit_dec_synth();
}
void
_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
+ jit_node_t *node = NULL;
assert(v->code == jit_code_arg_f);
jit_inc_synth_wp(putargr_f, u, v);
if (jit_cpu.abi) {
if (jit_arg_f_reg_p(v->u.w))
jit_movr_f(JIT_FA0 - v->u.w, u);
else
- jit_stxi_f(v->u.w, JIT_FP, u);
+ node = jit_stxi_f(v->u.w, JIT_FP, u);
}
else if (jit_swf_p())
- jit_stxi_f(arg_offset(v->u.w), JIT_FP, u);
+ node = jit_stxi_f(arg_offset(v->u.w), JIT_FP, u);
else {
if (jit_arg_reg_p(v->u.w))
jit_movr_f_w(JIT_RA0 - v->u.w, u);
else
- jit_stxi_f(v->u.w, JIT_FP, u);
+ node = jit_stxi_f(v->u.w, JIT_FP, u);
+ }
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
}
jit_dec_synth();
}
void
_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
{
- jit_int32_t regno;
+ jit_int32_t regno;
+ jit_node_t *node = NULL;
assert(v->code == jit_code_arg_f);
jit_inc_synth_fp(putargi_f, u, v);
if (jit_cpu.abi) {
else {
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
- jit_stxi_f(v->u.w, JIT_FP, regno);
+ node = jit_stxi_f(v->u.w, JIT_FP, regno);
jit_unget_reg(regno);
}
}
else if (jit_swf_p()) {
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
- jit_stxi_f(arg_offset(v->u.w), JIT_FP, regno);
+ node = jit_stxi_f(arg_offset(v->u.w), JIT_FP, regno);
jit_unget_reg(regno);
}
else {
if (jit_arg_reg_p(v->u.w))
jit_movr_f_w(JIT_RA0 - v->u.w, regno);
else
- jit_stxi_f(v->u.w, JIT_FP, regno);
+ node = jit_stxi_f(v->u.w, JIT_FP, regno);
jit_unget_reg(regno);
}
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
+ jit_node_t *node = NULL;
assert(v->code == jit_code_arg_d);
jit_inc_synth_wp(getarg_d, u, v);
if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
if (jit_arg_f_reg_p(v->u.w))
jit_movr_d(u, JIT_FA0 - v->u.w);
else
- jit_ldxi_d(u, JIT_FP, v->u.w);
+ node = jit_ldxi_d(u, JIT_FP, v->u.w);
}
else if (jit_swf_p())
- jit_ldxi_d(u, JIT_FP, arg_offset(v->u.w));
+ node = jit_ldxi_d(u, JIT_FP, arg_offset(v->u.w));
else {
if (jit_arg_reg_p(v->u.w))
jit_movr_ww_d(u, JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1));
else
- jit_ldxi_d(u, JIT_FP, v->u.w);
+ node = jit_ldxi_d(u, JIT_FP, v->u.w);
+ }
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
}
jit_dec_synth();
}
void
_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
+ jit_node_t *node = NULL;
assert(v->code == jit_code_arg_d);
jit_inc_synth_wp(putargr_d, u, v);
if (jit_cpu.abi) {
if (jit_arg_f_reg_p(v->u.w))
jit_movr_d(JIT_FA0 - v->u.w, u);
else
- jit_stxi_d(v->u.w, JIT_FP, u);
+ node = jit_stxi_d(v->u.w, JIT_FP, u);
}
else if (jit_swf_p())
- jit_stxi_d(arg_offset(v->u.w), JIT_FP, u);
+ node = jit_stxi_d(arg_offset(v->u.w), JIT_FP, u);
else {
if (jit_arg_reg_p(v->u.w))
jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), u);
else
- jit_stxi_d(v->u.w, JIT_FP, u);
+ node = jit_stxi_d(v->u.w, JIT_FP, u);
+ }
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
}
jit_dec_synth();
}
void
_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
{
- jit_int32_t regno;
+ jit_int32_t regno;
+ jit_node_t *node = NULL;
assert(v->code == jit_code_arg_d);
jit_inc_synth_dp(putargi_d, u, v);
if (jit_cpu.abi) {
else {
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
- jit_stxi_d(v->u.w, JIT_FP, regno);
+ node = jit_stxi_d(v->u.w, JIT_FP, regno);
jit_unget_reg(regno);
}
}
else if (jit_swf_p()) {
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
- jit_stxi_d(arg_offset(v->u.w), JIT_FP, regno);
+ node = jit_stxi_d(arg_offset(v->u.w), JIT_FP, regno);
jit_unget_reg(regno);
}
else {
if (jit_arg_reg_p(v->u.w))
jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), regno);
else
- jit_stxi_d(v->u.w, JIT_FP, regno);
+ node = jit_stxi_d(v->u.w, JIT_FP, regno);
jit_unget_reg(regno);
}
+ if (node) {
+ CHECK_REG_ARGS();
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
assert(_jitc->function);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
jit_node_t *node;
jit_uint8_t *data;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
else { \
word = name##r##type(_jit->pc.w, \
rn(node->v.w), rn(node->w.w)); \
- patch(word, node); \
+ patch(word, node, arm_patch_jump); \
} \
break
#define case_bvv(name, type) \
word = vfp_##name##r##type(_jit->pc.w, \
rn(node->v.w), \
rn(node->w.w)); \
- patch(word, node); \
+ patch(word, node, arm_patch_jump); \
} \
break
#define case_brw(name, type) \
else { \
word = name##i##type(_jit->pc.w, \
rn(node->v.w), node->w.w); \
- patch(word, node); \
+ patch(word, node, arm_patch_jump); \
} \
break;
#define case_bvf(name) \
word = vfp_##name##i_f(_jit->pc.w, \
rn(node->v.w), \
node->w.f); \
- patch(word, node); \
+ patch(word, node, arm_patch_jump); \
} \
break
#define case_bvd(name) \
word = vfp_##name##i_d(_jit->pc.w, \
rn(node->v.w), \
node->w.d); \
- patch(word, node); \
+ patch(word, node, arm_patch_jump); \
} \
break
#if DEVEL_DISASSEMBLER
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
+ case jit_code_skip:
+ if (jit_thumb_p())
+ nop((node->u.w + 1) & ~1);
+ else
+ nop((node->u.w + 3) & ~3);
+ break;
case jit_code_note: case jit_code_name:
if (must_align_p(node->next))
nop(2);
case_rrw(rsh, _u);
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_rrr(and,);
case_rrw(and,);
case_rrr(or,);
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
word = movi_p(rn(node->u.w), temp->u.w);
- patch(word, node);
+ patch(word, node, arm_patch_word);
}
}
else
case_bvv(bunord, _d);
case_bvd(bunord);
case jit_code_jmpr:
+ jit_check_frame();
jmpr(rn(node->u.w));
flush_consts();
break;
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
- word = jmpi_p(_jit->pc.w, 1);
- patch(word, node);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (jit_thumb_p()) word >>= 1;
+ else word >>= 2;
+ word -= 2;
+ value = _s24P(word);
+ word = jmpi_p(_jit->pc.w, value);
+ patch(word, node, value ?
+ arm_patch_jump : arm_patch_word);
}
}
- else
+ else {
+ jit_check_frame();
jmpi(node->u.w);
+ }
flush_consts();
break;
case jit_code_callr:
+ jit_check_frame();
callr(rn(node->u.w));
break;
case jit_code_calli:
if (node->flag & jit_flag_node) {
+ CHECK_RETURN();
temp = node->u.n;
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
if (temp->flag & jit_flag_patch)
- calli(temp->u.w);
+ calli(temp->u.w, 0);
else {
- word = calli_p(_jit->pc.w);
- patch(word, node);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (jit_exchange_p())
+ word -= 8;
+ if (jit_thumb_p()) word >>= 1;
+ else word >>= 2;
+ word -= 2;
+ value = _s24P(word);
+ word = calli_p(_jit->pc.w, value);
+ patch(word, node, value ?
+ arm_patch_call : arm_patch_word);
}
}
- else
- calli(node->u.w);
+ else {
+ jit_check_frame();
+ calli(node->u.w, jit_exchange_p());
+ }
break;
case jit_code_prolog:
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
#endif
restart_function:
_jitc->again = 0;
+ compute_framesize();
+ patch_alist(0);
prolog(node);
break;
case jit_code_epilog:
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ undo.func.need_frame = _jitc->function->need_frame;
+ undo.func.need_return = _jitc->function->need_return;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ /* swf_offset and check_reg_args must also not be undone */
+ undo.func.swf_offset = _jitc->function->swf_offset;
+ undo.func.save_reg_args = _jitc->function->save_reg_args;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
if (_jitc->data_info.ptr)
_jitc->data_info.offset = undo.info_offset;
#endif
+ patch_alist(1);
goto restart_function;
}
/* remember label is defined */
case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
- case jit_code_arg:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i:
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_s: case jit_code_getarg_us:
case jit_code_getarg_i:
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
node = _jitc->patches.ptr[offset].node;
word = _jitc->patches.ptr[offset].inst;
if (!jit_thumb_p() &&
- (node->code == jit_code_movi || node->code == jit_code_calli)) {
+ (node->code == jit_code_movi ||
+ (node->code == jit_code_calli &&
+ (_jitc->patches.ptr[offset].kind & ~arm_patch_node) ==
+ arm_patch_word))) {
/* calculate where to patch word */
value = *(jit_int32_t *)word;
assert((value & 0x0f700000) == ARM_LDRI);
}
static void
-_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+_compute_framesize(jit_state_t *_jit)
+{
+ jit_int32_t reg;
+ _jitc->framesize = sizeof(jit_word_t) * 2; /* lr+fp */
+ for (reg = 0; reg < jit_size(iregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+ _jitc->framesize += sizeof(jit_word_t);
+
+ if (_jitc->function->save_reg_args)
+ _jitc->framesize += 16;
+
+ /* Make sure functions called have a 8 byte aligned stack */
+ _jitc->framesize = (_jitc->framesize + 7) & -8;
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node, jit_int32_t kind)
{
jit_int32_t flag;
- jit_int32_t kind;
assert(node->flag & jit_flag_node);
- if (node->code == jit_code_movi) {
+ if (node->code == jit_code_movi)
flag = node->v.n->flag;
- kind = arm_patch_word;
- }
- else {
+ else
flag = node->u.n->flag;
- if (node->code == jit_code_calli ||
- (node->code == jit_code_jmpi && !(node->flag & jit_flag_node)))
- kind = arm_patch_word;
- else
- kind = arm_patch_jump;
- }
assert(!(flag & jit_flag_patch));
kind |= arm_patch_node;
if (_jitc->patches.offset >= _jitc->patches.length) {
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#endif
#if BINUTILS_2_38
-static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...)
+static int fprintf_styled(void * stream, enum disassembler_style style, const char* fmt, ...)
{
va_list args;
int r;
int line;
char buffer[address_buffer_length];
- sprintf(buffer, address_buffer_format, (long long)addr);
+ sprintf(buffer, address_buffer_format, addr);
(*info->fprintf_func)(info->stream, "0x%s", buffer);
# define _jit disasm_jit
old_line = line;
}
- bytes = sprintf(buffer, address_buffer_format, (long long)pc);
+ bytes = sprintf(buffer, address_buffer_format, pc);
(*disasm_info.fprintf_func)(disasm_stream, "%*c0x%s\t",
16 - bytes, ' ', buffer);
pc += (*disasm_print)(pc, &disasm_info);
#define fallback_casx(r0,r1,r2,r3,im) _fallback_casx(_jit,r0,r1,r2,r3,im)
static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t,
jit_int32_t, jit_int32_t, jit_word_t);
+#define fallback_clo(r0,r1) _fallback_clo(_jit,r0,r1)
+static void _fallback_clo(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_clz(r0,r1) _fallback_clz(_jit,r0,r1)
+static void _fallback_clz(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_cto(r0,r1) _fallback_cto(_jit,r0,r1)
+static void _fallback_cto(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_ctz(r0,r1) _fallback_ctz(_jit,r0,r1)
+static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t);
+# if defined(__ia64__)
+# define fallback_patch_jmpi(inst,lbl) \
+ do { \
+ sync(); \
+ patch_at(jit_code_jmpi, inst, lbl); \
+ } while (0)
+# else
+# define fallback_patch_jmpi(inst,lbl) fallback_patch_at(inst,lbl)
+# endif
+# if defined(__arm__)
+# define fallback_patch_at(inst,lbl) patch_at(arm_patch_jump,inst,lbl)
+# elif defined(__ia64__)
+# define fallback_patch_at(inst,lbl) \
+ do { \
+ sync(); \
+ patch_at(jit_code_bnei, inst, lbl); \
+ } while (0);
+# else
+# define fallback_patch_at(inst,lbl) patch_at(inst,lbl)
+# endif
+# if defined(__mips__)
+# define fallback_jmpi(i0) jmpi(i0,1)
+# elif defined(__arm__)
+# define fallback_jmpi(i0) jmpi_p(i0,1)
+# elif defined(__s390__) || defined(__s390x__)
+# define fallback_jmpi(i0) jmpi(i0,1)
+# else
+# define fallback_jmpi(i0) jmpi(i0)
+# endif
+# if defined(__mips__)
+# define fallback_bnei(i0,r0,i1) bnei(i0,r0,i1)
+# elif defined(__s390__) || defined(__s390x__)
+# define fallback_bnei(i0,r0,i1) bnei_p(i0,r0,i1)
+# else
+# define fallback_bnei(i0,r0,i1) bnei(i0,r0,i1)
+# endif
+# if defined(__s390__) || defined(__s390x__)
+# define fallback_bmsr(i0,r0,r1) bmsr_p(i0,r0,r1)
+# else
+# define fallback_bmsr(i0,r0,r1) bmsr(i0,r0,r1)
+# endif
#endif
#if CODE
{
# if defined(__arm__)
movi(rn(_R0), i1);
-# elif defined(__ia64__)
- /* avoid confusion with pushargi patching */
- if (i1 >= -2097152 && i1 <= 2097151)
- MOVI(_jitc->rout, i1);
- else
- MOVL(_jitc->rout, i1);
# elif defined(__hppa__)
movi(_R26_REGNO, i1);
-#endif
+# endif
+# if defined(__arm__)
+ calli(i0, jit_exchange_p());
+# elif defined(__mips__)
+ calli(i0, 0);
+# elif defined(__powerpc__) && _CALL_SYSV
+ calli(i0, 0);
+# elif defined(__s390__) || defined(__s390x__)
+ calli(i0, 0);
+# else
calli(i0);
+# endif
}
static void
fallback_load(r2);
eqr(r0, r0, r2);
fallback_save(r0);
- jump = bnei(_jit->pc.w, r0, 1);
+ jump = fallback_bnei(_jit->pc.w, r0, 1);
fallback_load(r3);
# if __WORDSIZE == 32
str_i(r1, r3);
str_l(r1, r3);
# endif
/* done: */
-# if defined(__ia64__)
- sync();
-# endif
done = _jit->pc.w;
fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex);
fallback_load(r0);
-# if defined(__arm__)
- patch_at(arm_patch_jump, jump, done);
-# elif defined(__ia64__)
- patch_at(jit_code_bnei, jump, done);
-# else
- patch_at(jump, done);
-# endif
+ fallback_patch_at(jump, done);
fallback_load_regs(r0);
if (iscasi)
jit_unget_reg(r1_reg);
}
+
+static void
+_fallback_clo(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_word_t clz, done;
+ comr(r0, r1);
+ clz = fallback_bnei(_jit->pc.w, r0, 0);
+ movi(r0, __WORDSIZE);
+ done = fallback_jmpi(_jit->pc.w);
+ fallback_patch_at(clz, _jit->pc.w);
+ fallback_clz(r0, r0);
+ fallback_patch_jmpi(done, _jit->pc.w);
+}
+
+static void
+_fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t r1_reg, r2, r2_reg;
+ jit_word_t clz, l32, l16, l8, l4, l2, l1;
+ l32 = fallback_bnei(_jit->pc.w, r1, 0);
+ movi(r0, __WORDSIZE);
+ clz = fallback_jmpi(_jit->pc.w);
+ fallback_patch_at(l32, _jit->pc.w);
+ r2_reg = jit_get_reg(jit_class_gpr);
+ r2 = rn(r2_reg);
+ r1_reg = jit_get_reg(jit_class_gpr);
+ movr(rn(r1_reg), r1);
+ r1 = rn(r1_reg);
+ movi(r0, 0);
+# if __WORDSIZE == 64
+ movi(r2, 0xffffffff00000000UL);
+ l32 = fallback_bmsr(_jit->pc.w, r1, r2);
+ lshi(r1, r1, 32);
+ addi(r0, r0, 32);
+ fallback_patch_at(l32, _jit->pc.w);
+ lshi(r2, r2, 16);
+# else
+ movi(r2, 0xffff0000UL);
+# endif
+ l16 = fallback_bmsr(_jit->pc.w, r1, r2);
+ lshi(r1, r1, 16);
+ addi(r0, r0, 16);
+ fallback_patch_at(l16, _jit->pc.w);
+ lshi(r2, r2, 8);
+ l8 = fallback_bmsr(_jit->pc.w, r1, r2);
+ lshi(r1, r1, 8);
+ addi(r0, r0, 8);
+ fallback_patch_at(l8, _jit->pc.w);
+ lshi(r2, r2, 4);
+ l4 = fallback_bmsr(_jit->pc.w, r1, r2);
+ lshi(r1, r1, 4);
+ addi(r0, r0, 4);
+ fallback_patch_at(l4, _jit->pc.w);
+ lshi(r2, r2, 2);
+ l2 = fallback_bmsr(_jit->pc.w, r1, r2);
+ lshi(r1, r1, 2);
+ addi(r0, r0, 2);
+ fallback_patch_at(l2, _jit->pc.w);
+ lshi(r2, r2, 1);
+ l1 = fallback_bmsr(_jit->pc.w, r1, r2);
+ addi(r0, r0, 1);
+ fallback_patch_at(l1, _jit->pc.w);
+ fallback_patch_jmpi(clz, _jit->pc.w);
+ jit_unget_reg(r2_reg);
+ jit_unget_reg(r1_reg);
+}
+
+static void
+_fallback_cto(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_word_t ctz, done;
+ comr(r0, r1);
+ ctz = fallback_bnei(_jit->pc.w, r0, 0);
+ movi(r0, __WORDSIZE);
+ done = fallback_jmpi(_jit->pc.w);
+ fallback_patch_at(ctz, _jit->pc.w);
+ fallback_ctz(r0, r0);
+ fallback_patch_jmpi(done, _jit->pc.w);
+}
+
+static void
+_fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t r1_reg, r2, r2_reg;
+ jit_word_t ctz, l32, l16, l8, l4, l2, l1;
+ l32 = fallback_bnei(_jit->pc.w, r1, 0);
+ movi(r0, __WORDSIZE);
+ ctz = fallback_jmpi(_jit->pc.w);
+ fallback_patch_at(l32, _jit->pc.w);
+ r2_reg = jit_get_reg(jit_class_gpr);
+ r2 = rn(r2_reg);
+ r1_reg = jit_get_reg(jit_class_gpr);
+ movr(rn(r1_reg), r1);
+ r1 = rn(r1_reg);
+ movi(r0, 0);
+# if __WORDSIZE == 64
+ movi(r2, 0xffffffffUL);
+ l32 = fallback_bmsr(_jit->pc.w, r1, r2);
+ rshi_u(r1, r1, 32);
+ addi(r0, r0, 32);
+ fallback_patch_at(l32, _jit->pc.w);
+ rshi(r2, r2, 16);
+# else
+ movi(r2, 0xffffUL);
+# endif
+ l16 = fallback_bmsr(_jit->pc.w, r1, r2);
+ rshi_u(r1, r1, 16);
+ addi(r0, r0, 16);
+ fallback_patch_at(l16, _jit->pc.w);
+ rshi(r2, r2, 8);
+ l8 = fallback_bmsr(_jit->pc.w, r1, r2);
+ rshi_u(r1, r1, 8);
+ addi(r0, r0, 8);
+ fallback_patch_at(l8, _jit->pc.w);
+ rshi(r2, r2, 4);
+ l4 = fallback_bmsr(_jit->pc.w, r1, r2);
+ rshi_u(r1, r1, 4);
+ addi(r0, r0, 4);
+ fallback_patch_at(l4, _jit->pc.w);
+ rshi(r2, r2, 2);
+ l2 = fallback_bmsr(_jit->pc.w, r1, r2);
+ rshi_u(r1, r1, 2);
+ addi(r0, r0, 2);
+ fallback_patch_at(l2, _jit->pc.w);
+ rshi(r2, r2, 1);
+ l1 = fallback_bmsr(_jit->pc.w, r1, r2);
+ addi(r0, r0, 1);
+ fallback_patch_at(l1, _jit->pc.w);
+ fallback_patch_jmpi(ctz, _jit->pc.w);
+ jit_unget_reg(r2_reg);
+ jit_unget_reg(r1_reg);
+}
#endif
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
#define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
#define extr_uc(r0,r1) EXTRWR_U(r1,31,8,r0)
#define extr_s(r0,r1) EXTRWR(r1,31,16,r0)
#define extr_us(r0,r1) EXTRWR_U(r1,31,16,r0)
-#define bswapr_us(r0,r1) generic_bswapr_us(_jit,r0,r1)
-#define bswapr_ui(r0,r1) generic_bswapr_ui(_jit,r0,r1)
#define addr(r0,r1,r2) ADD(r1,r2,r0)
#define addi(r0,r1,i0) _addi(_jit,r0,r1,i0)
static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
#define jmpr(r0) _jmpr(_jit,r0)
static void _jmpr(jit_state_t*,jit_int32_t);
#define jmpi(i0) _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
#define jmpi_p(i0) _jmpi_p(_jit,i0)
static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
#define callr(r0) _callr(_jit,r0)
return (w);
}
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t reg;
+ if (r0 == r1) {
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r1);
+ EXTRWR_U(rn(reg), 23, 8, r0);
+ DEPWR(rn(reg), 23, 8, r0);
+ jit_unget_reg(reg);
+ }
+ else {
+ EXTRWR_U(r1, 23, 8, r0);
+ DEPWR(r1, 23, 8, r0);
+ }
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t reg;
+ if (r0 == r1) {
+ reg = jit_get_reg(jit_class_gpr);
+ movr(rn(reg), r1);
+ SHRPWI(rn(reg), rn(reg), 16, r0);
+ DEPWR(r0, 15, 8, r0);
+ SHRPWI(rn(reg), r0, 8, r0);
+ jit_unget_reg(reg);
+ }
+ else {
+ SHRPWI(r1, r1, 16, r0);
+ DEPWR(r0, 15, 8, r0);
+ SHRPWI(r1, r0, 8, r0);
+ }
+}
+
static void
_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
BV_N(_R0_REGNO, r0);
}
-static void
+static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t w;
- w = ((i0 - _jit->pc.w) >> 2) - 2;
- if (w >= -32768 && w <= 32767)
- B_N(w, _R0_REGNO);
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = ((i0 - w) >> 2) - 2;
+ if (d >= -32768 && d <= 32767)
+ B_N(d, _R0_REGNO);
else {
- movi(_R1_REGNO, w);
+ movi(_R1_REGNO, d);
BV_N(_R0_REGNO, _R1_REGNO);
}
+ return (w);
}
static jit_word_t
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#define JIT_INSTR_MAX 196
0, /* data */
0, /* live */
- 0, /* align */
+ 28, /* align */
0, /* save */
0, /* load */
+ 0, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
20, /* va_arg_d */
8, /* movi */
12, /* movnr */
12, /* movzr */
+ 88, /* casr */
+ 96, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
+ 12, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 0, /* bswapr_ul */
4, /* htonr_us */
4, /* htonr_ui */
0, /* htonr_ul */
40, /* callr */
44, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 28, /* bswapr_us */
- 68, /* bswapr_ui */
- 0, /* bswapr_ul */
- 88, /* casr */
- 96, /* casi */
+ 160, /* clo */
+ 140, /* clz */
+ 164, /* cto */
+ 144, /* ctz */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
+ jit_code_inc_synth_w(code, u);
jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- assert(u->code == jit_code_arg ||
+ assert((u->code >= jit_code_arg_c && u->code <= jit_code_arg) ||
u->code == jit_code_arg_f || u->code == jit_code_arg_d);
return (jit_arg_reg_p(u->u.w));
}
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
assert(_jitc->function);
+ assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
_jitc->function->self.size -= sizeof(jit_word_t);
if (jit_arg_reg_p(_jitc->function->self.argi))
offset = _jitc->function->self.argi++;
else
offset = _jitc->function->self.size;
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (v->u.w >= 0)
jit_extr_c(u, _R26 - v->u.w);
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (v->u.w >= 0)
jit_extr_uc(u, _R26 - v->u.w);
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (v->u.w >= 0)
jit_extr_s(u, _R26 - v->u.w);
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (v->u.w >= 0)
jit_extr_us(u, _R26 - v->u.w);
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (v->u.w >= 0)
jit_movr(u, _R26 - v->u.w);
}
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (v->u.w >= 0)
jit_movr(_R26 - v->u.w, u);
else
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (v->u.w >= 0)
jit_movi(_R26 - v->u.w, u);
else {
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
assert(_jitc->function);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
_jitc->function->call.size -= sizeof(jit_word_t);
if (jit_arg_reg_p(_jitc->function->call.argi)) {
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
_jitc->function->call.size -= sizeof(jit_word_t);
if (jit_arg_reg_p(_jitc->function->call.argi)) {
struct {
jit_node_t *node;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
+ case jit_code_skip:
+ nop((node->u.w + 3) & ~3);
+ break;
case jit_code_note: case jit_code_name:
node->u.w = _jit->pc.w;
break;
break;
case_rr(neg,);
case_rr(com,);
+#define clor(r0, r1) fallback_clo(r0, r1)
+#define clzr(r0, r1) fallback_clz(r0, r1)
+#define ctor(r0, r1) fallback_cto(r0, r1)
+#define ctzr(r0, r1) fallback_ctz(r0, r1)
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_rr(ext, _c);
case_rr(ext, _uc);
case_rr(ext, _s);
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
- word = jmpi_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (word >= -32768 && word <= 32767)
+ word = jmpi(_jit->pc.w);
+ else
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo.
+ * Note that for hppa use '-' instead of '+' as hppa
+ * stack grows up */
+ undo.func.self.aoff = _jitc->function->frame -
+ _jitc->function->self.aoff;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
case jit_code_va_arg_d:
vaarg_d(rn(node->u.w), rn(node->v.w));
break;
- case jit_code_live:
- case jit_code_arg: case jit_code_ellipsis:
+ case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i:
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_s: case jit_code_getarg_us:
case jit_code_getarg_i:
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define nei(r0,r1,i0) _nei(_jit,r0,r1,i0)
static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define bitswap(r0, r1) _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+#define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#define clzr(r0, r1) _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
#define negr(r0,r1) subr(r0,0,r1)
#define comr(r0,r1) ANDCMI(r0,-1,r1)
#define movr(r0,r1) _movr(_jit,r0,r1)
#define jmpr(r0) _jmpr(_jit,r0)
static void _jmpr(jit_state_t*,jit_int32_t);
#define jmpi(i0) _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
#define jmpi_p(i0) _jmpi_p(_jit,i0)
static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
#define callr(r0) _callr(_jit,r0)
TSTREG1(r3);
TSTPRED(_p);
TSTREG1(r1);
- inst((7L<<37)|(1L<<34)|(1L<<34)|(1L<<33)|
+ inst((7L<<37)|(1L<<34)|(1L<<33)|
(x2<<30)|(1L<<28)|(r3<<20)|(r1<<6)|_p, INST_I);
SETREG(r1);
}
assert(i0 == 0);
}
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t t0, t1, t2, t3, t4;
+ movr(r0, r1);
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ t2 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+ rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+ rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+ rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
+ rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), 0x0000ffff0000ffffL);
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
+ lshi(rn(t2), r0, 32); /* t2 = v << 32 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ jit_unget_reg(t2);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.clz)
+ CLZ(r0, r1);
+ else
+ fallback_clz(r0, r1);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.clz) {
+ comr(r0, r1);
+ clzr(r0, r0);
+ }
+ else
+ fallback_clo(r0, r1);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.clz) {
+ bitswap(r0, r1);
+ clor(r0, r0);
+ }
+ else
+ fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.clz) {
+ bitswap(r0, r1);
+ clzr(r0, r0);
+ }
+ else
+ fallback_ctz(r0, r1);
+}
+
static void
_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
BR(BR_6);
}
-static void
+static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t d;
+ jit_word_t d, w;
sync();
- d = ((jit_word_t)i0 - _jit->pc.w) >> 4;
+ w = _jit->pc.w;
+ d = ((jit_word_t)i0 - w) >> 4;
if (d >= -16777216 && d <= 16777215)
BRI(d);
else
BRL(d);
+ return (w);
}
static jit_word_t
i1 = (ic >> 61) & 0x1L;
i41 = (ic >> 22) & 0x1ffffffffffL;
i20 = ic & 0xfffffL;
- assert((tm & ~1) == TM_M_L_X_ &&
+ if (!((tm & ~1) == TM_M_L_X_ &&
(s2 & 0xfL<<37) == (0xcL<<37) &&
- s0 == nop_m);
+ s0 == nop_m))
+ goto short_jump;
s1 = i41;
s2 &= (0xcL<<37)|(0x7L<<33)|(1L<<12);
s2 |= (i1<<36)|(i20<<13);
break;
default:
+ short_jump:
/* Only B1 in slot 0 expected due to need to either
* a stop to update predicates, or a sync before
* unconditional short branch */
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 224
+#define JIT_INSTR_MAX 608
0, /* data */
0, /* live */
- 0, /* align */
+ 48, /* align */
0, /* save */
0, /* load */
+ 16, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
16, /* va_start */
32, /* va_arg */
32, /* va_arg_d */
16, /* movi */
16, /* movnr */
16, /* movzr */
+ 48, /* casr */
+ 64, /* casi */
16, /* extr_c */
16, /* extr_uc */
16, /* extr_s */
16, /* extr_us */
16, /* extr_i */
16, /* extr_ui */
+ 32, /* bswapr_us */
+ 32, /* bswapr_ui */
+ 16, /* bswapr_ul */
32, /* htonr_us */
32, /* htonr_ui */
16, /* htonr_ul */
32, /* callr */
48, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* movi_d_ww */
16, /* movr_d_w */
32, /* movi_d_w */
- 32, /* bswapr_us */
- 32, /* bswapr_ui */
- 16, /* bswapr_ul */
- 48, /* casr */
- 64, /* casi */
+ 608, /* clo */
+ 544, /* clz */
+ 608, /* cto */
+ 544, /* ctz */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
/*
* Initialization
*/
+jit_cpu_t jit_cpu;
jit_register_t _rvs[] = {
/* Always 0 */
{ 0, "r0" },
void
jit_get_cpu(void)
{
+ jit_word_t clz = -1;
+ __asm__ volatile("tf.nz.unc p6,p7=32;(p6)mov %0=1;(p7)mov %0=0"
+ : "=r" (clz));
+ assert(clz == 0 || clz == 1);
+ jit_cpu.clz = clz;
}
void
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
+ jit_code_inc_synth_w(code, u);
jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- assert(u->code == jit_code_arg ||
- u->code == jit_code_arg_f || u->code == jit_code_arg_d);
- return (jit_arg_reg_p(u->u.w));
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
+ return (jit_arg_reg_p(u->u.w));
+ assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+ return (jit_arg_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
}
void
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
assert(_jitc->function);
+ assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
if (jit_arg_reg_p(_jitc->function->self.argi))
offset = _jitc->function->self.argi++;
else {
offset = _jitc->function->self.size;
_jitc->function->self.size += sizeof(jit_word_t);
}
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, _R32 + v->u.w);
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, _R32 + v->u.w);
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, _R32 + v->u.w);
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, _R32 + v->u.w);
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_i(u, _R32 + v->u.w);
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_ui, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_ui(u, _R32 + v->u.w);
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_l, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, _R32 + v->u.w);
}
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(_R32 + v->u.w, u);
else
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movi(_R32 + v->u.w, u);
else {
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
assert(_jitc->function);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movr(_OUT0 + _jitc->function->call.argi, u);
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movi(_OUT0 + _jitc->function->call.argi, u);
struct {
jit_node_t *node;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if (node->u.w > 8)
nop(node->u.w - 8);
break;
+ case jit_code_skip:
+ sync();
+ nop((node->u.w + 7) & ~7);
+ break;
case jit_code_note: case jit_code_name:
sync();
node->u.w = _jit->pc.w;
case_rrw(rsh, _u);
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case jit_code_casr:
casr(rn(node->u.w), rn(node->v.w),
rn(node->w.q.l), rn(node->w.q.h));
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
- word = jmpi_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (word >= -16777216 && word <= 16777215)
+ word = jmpi(_jit->pc.w);
+ else
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
case jit_code_va_arg_d:
vaarg_d(rn(node->u.w), rn(node->v.w));
break;
- case jit_code_live:
- case jit_code_arg: case jit_code_ellipsis:
+ case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i: case jit_code_arg_l:
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_i: case jit_code_getarg_ui:
case jit_code_getarg_l:
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
/*
- * Copyright (C) 2022 Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# define _RA_REGNO 1
# define _SP_REGNO 3
# define _FP_REGNO 22
-# define stack_framesize 160
# define ldr(u, v) ldr_l(u, v)
# define ldi(u, v) ldi_l(u, v)
# define ldxi(u, v, w) ldxi_l(u, v, w)
# define nop(i0) _nop(_jit, i0)
# define comr(r0, r1) NOR(r0, r1, r1)
# define negr(r0, r1) subr(r0, _ZERO_REGNO, r1)
+# define clor(r0, r1) CLO_D(r0, r1)
+# define clzr(r0, r1) CLZ_D(r0, r1)
+# define ctor(r0, r1) CTO_D(r0, r1)
+# define ctzr(r0, r1) CTZ_D(r0, r1)
static void _nop(jit_state_t*,jit_int32_t);
# define movr(r0, r1) _movr(_jit, r0, r1)
static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
# define jmpr(r0) JIRL(_ZERO_REGNO, r0, 0)
# define jmpi(i0) _jmpi(_jit, i0)
-static void _jmpi(jit_state_t*, jit_word_t);
+static jit_word_t _jmpi(jit_state_t*, jit_word_t);
# define jmpi_p(i0) _jmpi_p(_jit, i0)
static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
# define boaddr(i0, r0, r1) _boaddr(_jit, i0, r0, r1)
static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
# define callr(r0) JIRL(_RA_REGNO, r0, 0)
# define calli(i0) _calli(_jit, i0)
-static void _calli(jit_state_t*, jit_word_t);
+static jit_word_t _calli(jit_state_t*, jit_word_t);
# define calli_p(i0) _calli_p(_jit, i0)
static jit_word_t _calli_p(jit_state_t*, jit_word_t);
# define prolog(i0) _prolog(_jit, i0)
return (w);
}
-static void
+static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t w;
- w = (i0 - _jit->pc.w) >> 2;
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 2;
if (can_sign_extend_si26_p(i0))
- B(w);
+ B(d);
else
- (void)jmpi_p(i0);
+ w = jmpi_p(i0);
+ return (w);
}
static jit_word_t
return (w);
}
-static void
+static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t w;
- w = (i0 - _jit->pc.w) >> 2;
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 2;
if (can_sign_extend_si26_p(i0))
- BL(w);
+ BL(d);
else
- (void)calli_p(i0);
+ w = calli_p(i0);
+ return (w);
}
static jit_word_t
static void
_prolog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t reg;
+ jit_int32_t reg, offs;
if (_jitc->function->define_frame || _jitc->function->assume_frame) {
jit_int32_t frame = -_jitc->function->frame;
+ jit_check_frame();
assert(_jitc->function->self.aoff >= frame);
if (_jitc->function->assume_frame)
return;
_jitc->function->stack = ((_jitc->function->self.alen -
/* align stack at 16 bytes */
_jitc->function->self.aoff) + 15) & -16;
- subi(_SP_REGNO, _SP_REGNO, stack_framesize);
- stxi(0, _SP_REGNO, _RA_REGNO);
- stxi(8, _SP_REGNO, _FP_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _S0))
- stxi(16, _SP_REGNO, rn(_S0));
- if (jit_regset_tstbit(&_jitc->function->regset, _S1))
- stxi(24, _SP_REGNO, rn(_S1));
- if (jit_regset_tstbit(&_jitc->function->regset, _S2))
- stxi(32, _SP_REGNO, rn(_S2));
- if (jit_regset_tstbit(&_jitc->function->regset, _S3))
- stxi(40, _SP_REGNO, rn(_S3));
- if (jit_regset_tstbit(&_jitc->function->regset, _S4))
- stxi(48, _SP_REGNO, rn(_S4));
- if (jit_regset_tstbit(&_jitc->function->regset, _S5))
- stxi(56, _SP_REGNO, rn(_S5));
- if (jit_regset_tstbit(&_jitc->function->regset, _S6))
- stxi(64, _SP_REGNO, rn(_S6));
- if (jit_regset_tstbit(&_jitc->function->regset, _S7))
- stxi(72, _SP_REGNO, rn(_S7));
- if (jit_regset_tstbit(&_jitc->function->regset, _S8))
- stxi(80, _SP_REGNO, rn(_S8));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
- stxi_d(88, _SP_REGNO, rn(_FS0));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
- stxi_d(96, _SP_REGNO, rn(_FS1));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
- stxi_d(104, _SP_REGNO, rn(_FS2));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
- stxi_d(112, _SP_REGNO, rn(_FS3));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
- stxi_d(120, _SP_REGNO, rn(_FS4));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
- stxi_d(128, _SP_REGNO, rn(_FS5));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
- stxi_d(136, _SP_REGNO, rn(_FS6));
- if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
- stxi_d(144, _SP_REGNO, rn(_FS7));
- movr(_FP_REGNO, _SP_REGNO);
+
+ if (_jitc->function->stack)
+ _jitc->function->need_stack = 1;
+ if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+ /* check if any callee save register needs to be saved */
+ for (reg = 0; reg < _jitc->reglen; ++reg)
+ if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+ (_rvs[reg].spec & jit_class_sav)) {
+ _jitc->function->need_stack = 1;
+ break;
+ }
+ }
+
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+ if (_jitc->function->need_frame) {
+ stxi(0, _SP_REGNO, _RA_REGNO);
+ stxi(8, _SP_REGNO, _FP_REGNO);
+ }
+ /* callee save registers */
+ for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ stxi(offs, _SP_REGNO, rn(iregs[reg]));
+ offs += sizeof(jit_word_t);
+ }
+ }
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+ offs += sizeof(jit_float64_t);
+ }
+ }
+
+ if (_jitc->function->need_frame)
+ movr(_FP_REGNO, _SP_REGNO);
if (_jitc->function->stack)
subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
if (_jitc->function->allocar) {
}
if (_jitc->function->self.call & jit_call_varargs) {
for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
- stxi(stack_framesize - ((8 - reg) * 8),
+ stxi(jit_framesize() - ((8 - reg) * 8),
_FP_REGNO, rn(JIT_RA0 - reg));
}
}
static void
_epilog(jit_state_t *_jit, jit_node_t *node)
{
+ jit_int32_t reg, offs;
if (_jitc->function->assume_frame)
return;
- movr(_SP_REGNO, _FP_REGNO);
- ldxi(_RA_REGNO, _SP_REGNO, 0);
- ldxi(_FP_REGNO, _SP_REGNO, 8);
- if (jit_regset_tstbit(&_jitc->function->regset, _S0))
- ldxi(rn(_S0), _SP_REGNO, 16);
- if (jit_regset_tstbit(&_jitc->function->regset, _S1))
- ldxi(rn(_S1), _SP_REGNO, 24);
- if (jit_regset_tstbit(&_jitc->function->regset, _S2))
- ldxi(rn(_S2), _SP_REGNO, 32);
- if (jit_regset_tstbit(&_jitc->function->regset, _S3))
- ldxi(rn(_S3), _SP_REGNO, 40);
- if (jit_regset_tstbit(&_jitc->function->regset, _S4))
- ldxi(rn(_S4), _SP_REGNO, 48);
- if (jit_regset_tstbit(&_jitc->function->regset, _S5))
- ldxi(rn(_S5), _SP_REGNO, 56);
- if (jit_regset_tstbit(&_jitc->function->regset, _S6))
- ldxi(rn(_S6), _SP_REGNO, 64);
- if (jit_regset_tstbit(&_jitc->function->regset, _S7))
- ldxi(rn(_S7), _SP_REGNO, 72);
- if (jit_regset_tstbit(&_jitc->function->regset, _S8))
- ldxi(rn(_S8), _SP_REGNO, 80);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
- ldxi_d(rn(_FS0), _SP_REGNO, 88);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
- ldxi_d(rn(_FS1), _SP_REGNO, 96);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
- ldxi_d(rn(_FS2), _SP_REGNO, 104);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
- ldxi_d(rn(_FS3), _SP_REGNO, 112);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
- ldxi_d(rn(_FS4), _SP_REGNO, 120);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
- ldxi_d(rn(_FS5), _SP_REGNO, 128);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
- ldxi_d(rn(_FS6), _SP_REGNO, 136);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
- ldxi_d(rn(_FS7), _SP_REGNO, 144);
- addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+ if (_jitc->function->need_frame) {
+ movr(_SP_REGNO, _FP_REGNO);
+ ldxi(_RA_REGNO, _SP_REGNO, 0);
+ ldxi(_FP_REGNO, _SP_REGNO, 8);
+ }
+
+ /* callee save registers */
+ for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+ offs += sizeof(jit_word_t);
+ }
+ }
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+ offs += sizeof(jit_float64_t);
+ }
+ }
+
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ addi(_SP_REGNO, _SP_REGNO, jit_framesize());
JIRL(_ZERO_REGNO, _RA_REGNO, 0);
}
assert(_jitc->function->self.call & jit_call_varargs);
/* Initialize va_list to the first stack argument. */
if (jit_arg_reg_p(_jitc->function->vagp))
- addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8));
+ addi(r0, _FP_REGNO, jit_framesize() - ((8 - _jitc->function->vagp) * 8));
else
- addi(r0, _FP_REGNO, _jitc->function->self.size);
+ addi(r0, _FP_REGNO, jit_selfsize());
}
static void
/*
- * Copyright (C) 2022 Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
28, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
8, /* va_arg_d */
16, /* movi */
12, /* movnr */
12, /* movzr */
+ 32, /* casr */
+ 44, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
4, /* extr_i */
4, /* extr_ui */
+ 8, /* bswapr_us */
+ 8, /* bswapr_ui */
+ 4, /* bswapr_ul */
8, /* htonr_us */
8, /* htonr_ui */
4, /* htonr_ul */
4, /* ldr_c */
- 16, /* ldi_c */
+ 20, /* ldi_c */
4, /* ldr_uc */
- 16, /* ldi_uc */
+ 20, /* ldi_uc */
4, /* ldr_s */
- 16, /* ldi_s */
+ 20, /* ldi_s */
4, /* ldr_us */
- 16, /* ldi_us */
+ 20, /* ldi_us */
4, /* ldr_i */
- 16, /* ldi_i */
+ 20, /* ldi_i */
4, /* ldr_ui */
- 16, /* ldi_ui */
+ 20, /* ldi_ui */
4, /* ldr_l */
- 16, /* ldi_l */
+ 20, /* ldi_l */
4, /* ldxr_c */
16, /* ldxi_c */
4, /* ldxr_uc */
4, /* ldxr_l */
16, /* ldxi_l */
4, /* str_c */
- 16, /* sti_c */
+ 20, /* sti_c */
4, /* str_s */
- 16, /* sti_s */
+ 20, /* sti_s */
4, /* str_i */
- 16, /* sti_i */
+ 20, /* sti_i */
4, /* str_l */
- 16, /* sti_l */
+ 20, /* sti_l */
4, /* stxr_c */
16, /* stxi_c */
4, /* stxr_s */
4, /* callr */
20, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
4, /* movr_f */
8, /* movi_f */
4, /* ldr_f */
- 16, /* ldi_f */
+ 20, /* ldi_f */
4, /* ldxr_f */
16, /* ldxi_f */
4, /* str_f */
- 16, /* sti_f */
+ 20, /* sti_f */
4, /* stxr_f */
16, /* stxi_f */
8, /* bltr_f */
4, /* movr_d */
16, /* movi_d */
4, /* ldr_d */
- 16, /* ldi_d */
+ 20, /* ldi_d */
4, /* ldxr_d */
16, /* ldxi_d */
4, /* str_d */
- 16, /* sti_d */
+ 20, /* sti_d */
4, /* stxr_d */
16, /* stxi_d */
8, /* bltr_d */
0, /* movi_d_ww */
4, /* movr_d_w */
12, /* movi_d_w */
- 8, /* bswapr_us */
- 8, /* bswapr_ui */
- 4, /* bswapr_ul */
- 32, /* casr */
- 44, /* casi */
+ 4, /* clo */
+ 4, /* clz */
+ 4, /* cto */
+ 4, /* ctz */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2022 Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* Paulo Cesar Pereira de Andrade
*/
+/* callee save + variadic arguments
+ * align16(ra+fp+s[0-8]+fs[0-7]) + align16(a[0-7]) */
+#define stack_framesize (144 + 64)
+
#define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8)
#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8)
/*
* Prototypes
*/
+#define compute_framesize() _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
#define patch(instr, node) _patch(_jit, instr, node)
static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
{ _NOREG, "<none>" },
};
+static jit_int32_t iregs[] = {
+ _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8
+};
+
+static jit_int32_t fregs[] = {
+ _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7
+};
+
/*
* Implementation
*/
_jit_allocai(jit_state_t *_jit, jit_int32_t length)
{
assert(_jitc->function);
+ jit_check_frame();
switch (length) {
case 0: case 1: break;
case 2: _jitc->function->self.aoff &= -2; break;
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
- if (JIT_RET != u)
- jit_movr(JIT_RET, u);
- jit_live(JIT_RET);
+ jit_code_inc_synth_w(code, u);
+ jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- if (u->code == jit_code_arg)
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
- return (jit_arg_f_reg_p(u->u.w));
+ return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
}
void
_jit_ellipsis(jit_state_t *_jit)
{
jit_inc_synth(ellipsis);
+ jit_check_frame();
if (_jitc->prepare) {
jit_link_prepare();
assert(!(_jitc->function->call.call & jit_call_varargs));
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
assert(_jitc->function);
assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
if (jit_arg_reg_p(_jitc->function->self.argi))
offset = _jitc->function->self.argi++;
else {
offset = _jitc->function->self.size;
_jitc->function->self.size += sizeof(jit_word_t);
+ jit_check_frame();
}
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
else {
offset = _jitc->function->self.size;
_jitc->function->self.size += sizeof(jit_word_t);
+ jit_check_frame();
}
node = jit_new_node_ww(jit_code_arg_f, offset,
++_jitc->function->self.argn);
else {
offset = _jitc->function->self.size;
_jitc->function->self.size += sizeof(jit_word_t);
+ jit_check_frame();
}
node = jit_new_node_ww(jit_code_arg_d, offset,
++_jitc->function->self.argn);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, _A0 - v->u.w);
- else
- jit_ldxi_c(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_c(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, _A0 - v->u.w);
- else
- jit_ldxi_uc(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, _A0 - v->u.w);
- else
- jit_ldxi_s(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_s(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, _A0 - v->u.w);
- else
- jit_ldxi_us(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_us(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_i(u, _A0 - v->u.w);
- else
- jit_ldxi_i(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_i(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_ui, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_ui(u, _A0 - v->u.w);
- else
- jit_ldxi_ui(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_l, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, _A0 - v->u.w);
- else
- jit_ldxi_l(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_l(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(_A0 - v->u.w, u);
- else
- jit_stxi(v->u.w, JIT_FP, u);
+ else {
+ jit_node_t *node = jit_stxi(v->u.w, JIT_FP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movi(_A0 - v->u.w, u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
- jit_stxi(v->u.w, JIT_FP, regno);
+ node = jit_stxi(v->u.w, JIT_FP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
jit_movr_f(u, _FA0 - v->u.w);
else if (jit_arg_reg_p(v->u.w - 8))
jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
- else
- jit_ldxi_f(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_f(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_movr_f(_FA0 - v->u.w, u);
else if (jit_arg_reg_p(v->u.w - 8))
jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
- else
- jit_stxi_f(v->u.w, JIT_FP, u);
+ else {
+ jit_node_t *node = jit_stxi_f(v->u.w, JIT_FP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_inc_synth_fp(putargi_f, u, v);
if (jit_arg_f_reg_p(v->u.w))
jit_movi_f(_FA0 - v->u.w, u);
- else if (jit_arg_reg_p(v->u.w - 8)) {
- union {
- jit_float32_t f;
- jit_int32_t i;
- } uu;
- uu.f = u;
- jit_movi(JIT_RA0 - (v->u.w - 8), uu.i);
- }
+ else if (jit_arg_reg_p(v->u.w - 8))
+ jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
- jit_stxi_f(v->u.w, JIT_FP, regno);
+ node = jit_stxi_f(v->u.w, JIT_FP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
jit_movr_d(u, _FA0 - v->u.w);
else if (jit_arg_reg_p(v->u.w - 8))
jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
- else
- jit_ldxi_d(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_d(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_movr_d(_FA0 - v->u.w, u);
else if (jit_arg_reg_p(v->u.w - 8))
jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
- else
- jit_stxi_d(v->u.w, JIT_FP, u);
+ else {
+ jit_node_t *node = jit_stxi_d(v->u.w, JIT_FP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_inc_synth_dp(putargi_d, u, v);
if (jit_arg_f_reg_p(v->u.w))
jit_movi_d(_FA0 - v->u.w, u);
- else if (jit_arg_reg_p(v->u.w - 8)) {
- union {
- jit_float64_t d;
- jit_int64_t w;
- } uu;
- uu.d = u;
- jit_movi(JIT_RA0 - (v->u.w - 8), uu.w);
- }
+ else if (jit_arg_reg_p(v->u.w - 8))
+ jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
- jit_stxi_d(v->u.w, JIT_FP, regno);
+ node = jit_stxi_d(v->u.w, JIT_FP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
assert(_jitc->function);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movr(_A0 - _jitc->function->call.argi, u);
else {
jit_stxi(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movi(_A0 - _jitc->function->call.argi, u);
jit_stxi(_jitc->function->call.size, JIT_SP, regno);
jit_unget_reg(regno);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
else {
jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
jit_unget_reg(regno);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
else {
jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
jit_unget_reg(regno);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
{
jit_node_t *node;
assert(_jitc->function);
+ jit_check_frame();
jit_inc_synth_w(finishr, r0);
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
{
jit_node_t *node;
assert(_jitc->function);
+ jit_check_frame();
jit_inc_synth_w(finishi, (jit_word_t)i0);
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
struct {
jit_node_t *node;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
+ case jit_code_skip:
+ nop((node->u.w + 3) & ~3);
+ break;
case jit_code_note: case jit_code_name:
node->u.w = _jit->pc.w;
break;
case_rrw(rsh, _u);
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_rrr(and,);
case_rrw(and,);
case_rrr(or,);
case_brr(bunord, _d);
case_brd(bunord);
case jit_code_jmpr:
+ jit_check_frame();
jmpr(rn(node->u.w));
break;
case jit_code_jmpi:
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
- word = jmpi_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (can_sign_extend_si26_p(word))
+ word = jmpi(_jit->pc.w);
+ else
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
- else
+ else {
+ jit_check_frame();
jmpi(node->u.w);
+ }
break;
case jit_code_callr:
+ jit_check_frame();
callr(rn(node->u.w));
break;
case jit_code_calli:
if (temp->flag & jit_flag_patch)
calli(temp->u.w);
else {
- word = calli_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (can_sign_extend_si26_p(word))
+ word = calli(_jit->pc.w);
+ else
+ word = calli_p(_jit->pc.w);
patch(word, node);
}
}
- else
+ else {
+ jit_check_frame();
calli(node->u.w);
+ }
break;
case jit_code_prolog:
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
undo.patch_offset = _jitc->patches.offset;
restart_function:
+ compute_framesize();
+ patch_alist(0);
_jitc->again = 0;
prolog(node);
break;
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ undo.func.need_frame = _jitc->function->need_frame;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ /* this will be recomputed but undo anyway to have it
+ * better self documented.*/
+ undo.func.need_stack = _jitc->function->need_stack;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
_jitc->patches.offset = undo.patch_offset;
+ patch_alist(1);
goto restart_function;
}
if (node->link && (word = _jit->pc.w & 3))
case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
- case jit_code_arg:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i: case jit_code_arg_l:
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_i:
case jit_code_getarg_ui: case jit_code_getarg_l:
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
stxi_d(i0, rn(r0), rn(r1));
}
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+ jit_int32_t reg;
+ _jitc->framesize = 16; /* ra+fp */
+ for (reg = 0; reg < jit_size(iregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+ _jitc->framesize += sizeof(jit_word_t);
+
+ for (reg = 0; reg < jit_size(fregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+ _jitc->framesize += sizeof(jit_float64_t);
+
+ /* Space to store variadic arguments */
+ if (_jitc->function->self.call & jit_call_varargs)
+ _jitc->framesize += (8 - _jitc->function->vagp) * 8;
+
+ /* Make sure functions called have a 16 byte aligned stack */
+ _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
static void
_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
{
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
struct { jit_uint32_t _:16; jit_uint32_t b : 5; } ft;
struct { jit_uint32_t _:11; jit_uint32_t b : 5; } rd;
struct { jit_uint32_t _:11; jit_uint32_t b : 5; } fs;
+ struct { jit_uint32_t _: 7; jit_uint32_t b : 9; } i9;
struct { jit_uint32_t _: 6; jit_uint32_t b : 5; } ic;
struct { jit_uint32_t _: 6; jit_uint32_t b : 5; } fd;
- struct { jit_uint32_t _: 6; jit_uint32_t b : 10; } tr;
- struct { jit_uint32_t _: 6; jit_uint32_t b : 20; } br;
struct { jit_uint32_t b : 6; } tc;
+ struct { jit_uint32_t b : 5; } cn;
struct { jit_uint32_t b : 11; } cc;
struct { jit_uint32_t b : 16; } is;
struct { jit_uint32_t b : 26; } ii;
struct { jit_uint32_t _:11; jit_uint32_t b : 5; } ft;
struct { jit_uint32_t _:16; jit_uint32_t b : 5; } rd;
struct { jit_uint32_t _:16; jit_uint32_t b : 5; } fs;
+ struct { jit_uint32_t _:16; jit_uint32_t b : 9; } i9;
struct { jit_uint32_t _:21; jit_uint32_t b : 5; } ic;
struct { jit_uint32_t _:21; jit_uint32_t b : 5; } fd;
- struct { jit_uint32_t _:21; jit_uint32_t b : 10; } tr;
- struct { jit_uint32_t _:21; jit_uint32_t b : 20; } br;
struct { jit_uint32_t _:26; jit_uint32_t b : 6; } tc;
+ struct { jit_uint32_t _:27; jit_uint32_t b : 5; } cn;
struct { jit_uint32_t _:21; jit_uint32_t b : 11; } cc;
struct { jit_uint32_t _:16; jit_uint32_t b : 16; } is;
struct { jit_uint32_t _: 6; jit_uint32_t b : 26; } ii;
#endif
int op;
} jit_instr_t;
-#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
-# define jit_mips2_p() 1
-#else
-# define jit_mips2_p() 0
-#endif
+#define jit_mips2_p() (jit_cpu.release >= 2)
+#define jit_mips6_p() (jit_cpu.release >= 6)
# define _ZERO_REGNO 0
# define _T0_REGNO 0x08
# define _T1_REGNO 0x09
# define _F28_REGNO 28
# define _F30_REGNO 30
# if __WORDSIZE == 32
-# if NEW_ABI
-# define stack_framesize 144
-# else
-# define stack_framesize 112
-# endif
# define ldr(u,v) ldr_i(u,v)
# define ldi(u,v) ldi_i(u,v)
# define ldxi(u,v,w) ldxi_i(u,v,w)
# define sti(u,v) sti_i(u,v)
# define stxi(u,v,w) stxi_i(u,v,w)
# else
-# define stack_framesize 144
# define ldr(u,v) ldr_l(u,v)
# define ldi(u,v) ldi_l(u,v)
# define ldxi(u,v,w) ldxi_l(u,v,w)
# define sti(u,v) sti_l(u,v)
# define stxi(u,v,w) stxi_l(u,v,w)
# endif
+/* can_relative_jump_p(im) => can_sign_extend_short_p(im << 2) */
+# define can_relative_jump_p(im) ((im) >= -130712 && (im) <= 131068)
# define can_sign_extend_short_p(im) ((im) >= -32678 && (im) <= 32767)
# define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535)
# define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0)
# define MIPS_CT 0x06
# define MIPS_MTH 0x07
# define MIPS_BC 0x08
+# define MIPS_BC1EQZ 0x09 /* release 6 */
+# define MIPS_BC1NEZ 0x0d /* release 6 */
# define MIPS_WRPGPR 0x0e
# define MIPS_BGZAL 0x11
# define MIPS_MFMC0 0x11
# define MIPS_DSRA32 0x3f
# define MIPS_SDBPP 0x3f
# define ii(i) *_jit->pc.ui++ = i
+# define instr(op) _instr(_jit, op)
+static void _instr(jit_state_t*, jit_int32_t);
+# define flush() _flush(_jit)
+static void _flush(jit_state_t*);
+# define pending() _pending(_jit)
+static jit_int32_t _pending(jit_state_t*);
+# define delay(op) _delay(_jit,op)
+static void _delay(jit_state_t*,jit_int32_t);
+# define jit_get_reg_for_delay_slot(mask, r0,r1) \
+ _jit_get_reg_for_delay_slot(_jit,mask,r0,r1)
+static jit_int32_t _jit_get_reg_for_delay_slot(jit_state_t*,jit_int32_t,
+ jit_int32_t, jit_int32_t);
+# define hrrrit(hc,rs,rt,rd,im,tc) _hrrrit(_jit,hc,rs,rt,rd,im,tc)
static void
_hrrrit(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
jit_int32_t,jit_int32_t);
-# define hrrrit(hc,rs,rt,rd,im,tc) _hrrrit(_jit,hc,rs,rt,rd,im,tc)
# define hrrr_t(hc,rs,rt,rd,tc) hrrrit(hc,rs,rt,rd,0,tc)
# define rrr_t(rs,rt,rd,tc) hrrr_t(0,rs,rt,rd,tc)
# define hrri(hc,rs,rt,im) _hrri(_jit,hc,rs,rt,im)
static void _hrri(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+# define hrri9(hc,rs,rt,i9,tc) _hrri9(_jit,hc,rs,rt,i9,tc)
+static void _hrri9(jit_state_t*,jit_int32_t,jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_int32_t);
# define hi(hc,im) _hi(_jit,hc,im)
static void _hi(jit_state_t*,jit_int32_t,jit_int32_t);
-# define NOP(i0) ii(0)
+# define NOP(i0) instr(0)
# define nop(i0) _nop(_jit,i0)
static void _nop(jit_state_t*,jit_int32_t);
# define h_ri(hc,rt,im) _hrri(_jit,hc,0,rt,im)
# define DSUBU(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_DSUBU)
# define MUL(rd,rs,rt) hrrr_t(MIPS_SPECIAL2,rs,rt,rd,MIPS_MUL)
# define MULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT)
+# define MUL_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 24)
+# define MUH_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 24)
# define MULTU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU)
+# define MULU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 25)
+# define MUHU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 25)
# define DMULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT)
+# define DMUL_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 28)
+# define DMUH_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 28)
# define DMULTU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULTU)
+# define DMULU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 29)
+# define DMUHU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 29)
# define DIV(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIV)
+# define DIV_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 26)
+# define MOD_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 26)
# define DIVU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIVU)
+# define DIVU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 27)
+# define MODU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 27)
# define DDIV(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIV)
+# define DDIV_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 30)
+# define DMOD_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 30)
# define DDIVU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIVU)
+# define DDIVU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 31)
+# define DMODU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 31)
# define SLLV(rd,rt,rs) rrr_t(rs,rt,rd,MIPS_SLLV)
# define SLL(rd,rt,sa) rrit(rt,rd,sa,MIPS_SLL)
# define DSLLV(rd,rt,rs) rrr_t(rs,rt,rd,MIPS_DSLLV)
# define ANDI(rt,rs,im) hrri(MIPS_ANDI,rs,rt,im)
# define OR(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_OR)
# define ORI(rt,rs,im) hrri(MIPS_ORI,rs,rt,im)
+# define NOR(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_NOR)
# define XOR(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_XOR)
# define XORI(rt,rs,im) hrri(MIPS_XORI,rs,rt,im)
# define LB(rt,of,rb) hrri(MIPS_LB,rb,rt,of)
# define LWU(rt,of,rb) hrri(MIPS_LWU,rb,rt,of)
# define LD(rt,of,rb) hrri(MIPS_LD,rb,rt,of)
# define LL(rt,of,rb) hrri(MIPS_LL,rb,rt,of)
+# define LL_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,54)
# define LLD(rt,of,rb) hrri(MIPS_LLD,rb,rt,of)
+# define LLD_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,55)
# define SB(rt,of,rb) hrri(MIPS_SB,rb,rt,of)
# define SH(rt,of,rb) hrri(MIPS_SH,rb,rt,of)
# define SW(rt,of,rb) hrri(MIPS_SW,rb,rt,of)
# define SD(rt,of,rb) hrri(MIPS_SD,rb,rt,of)
# define SC(rt,of,rb) hrri(MIPS_SC,rb,rt,of)
+# define SC_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,38)
# define SCD(rt,of,rb) hrri(MIPS_SCD,rb,rt,of)
+# define SCD_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,39)
# define WSBH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL)
# define SEB(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL)
# define SEH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL)
# define BGEZ(rs,im) hrri(MIPS_REGIMM,rs,MIPS_BGEZ,im)
# define BGTZ(rs,im) hrri(MIPS_BGTZ,rs,_ZERO_REGNO,im)
# define BNE(rs,rt,im) hrri(MIPS_BNE,rs,rt,im)
+# define BGEZAL(rs,im) hrri(MIPS_REGIMM,rs,MIPS_BGEZAL,im)
# define JALR(r0) hrrrit(MIPS_SPECIAL,r0,0,_RA_REGNO,0,MIPS_JALR)
-# if 1 /* supports MIPS32 R6 */
-# define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JALR)
-# else /* does not support MIPS32 R6 */
-# define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
+# if 1 /* This should work for mips r6 or older */
+# define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JALR)
+# else /* This should generate an illegal instruction in mips r6 */
+# define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
# endif
+# define CLO_R6(rd,rs) hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x11)
+# define DCLO_R6(rd,rs) hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x13)
+# define CLZ_R6(rd,rs) hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x10)
+# define DCLZ_R6(rd,rs) hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x12)
+# define BITSWAP(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,0,0x20)
+# define DBITSWAP(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,0,0x24)
+# define CLO(rd,rs) hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_CLO)
+# define DCLO(rd,rs) hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_DCLO)
+# define CLZ(rd,rs) hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_CLZ)
+# define DCLZ(rd,rs) hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_DCLZ)
# define J(i0) hi(MIPS_J,i0)
# define JAL(i0) hi(MIPS_JAL,i0)
# define MOVN(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVN)
# define MOVZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVZ)
+# define SELEQZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,53)
+# define SELNEZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,55)
# define comr(r0,r1) xori(r0,r1,-1)
# define negr(r0,r1) subr(r0,_ZERO_REGNO,r1)
+# define bitswap(r0,r1) _bitswap(_jit, r0, r1);
+static void _bitswap(jit_state_t*,jit_int32_t,jit_int32_t);
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# if __WORDSIZE == 32
# define addr(rd,rs,rt) ADDU(rd,rs,rt)
# define addiu(r0,r1,i0) ADDIU(r0,r1,i0)
# define subr(rd,rs,rt) SUBU(rd,rs,rt)
# define mult(rs,rt) MULT(rs,rt)
+# define mul_r6(rd,rs,rt) MUL_R6(rd,rs,rt)
+# define muh_r6(rd,rs,rt) MUH_R6(rd,rs,rt)
# define multu(rs,rt) MULTU(rs,rt)
+# define mulu_r6(rd,rs,rt) MULU_R6(rd,rs,rt)
+# define muhu_r6(rd,rs,rt) MUHU_R6(rd,rs,rt)
# define div(rs,rt) DIV(rs,rt)
# define divu(rs,rt) DIVU(rs,rt)
+# define div_r6(rd,rs,rt) DIV_R6(rd,rs,rt)
+# define divu_r6(rd,rs,rt) DIVU_R6(rd,rs,rt)
+# define mod_r6(rd,rs,rt) MOD_R6(rd,rs,rt)
+# define modu_r6(rd,rs,rt) MODU_R6(rd,rs,rt)
# else
# define addr(rd,rs,rt) DADDU(rd,rs,rt)
# define addiu(r0,r1,i0) DADDIU(r0,r1,i0)
# define subr(rd,rs,rt) DSUBU(rd,rs,rt)
# define mult(rs,rt) DMULT(rs,rt)
+# define mul_r6(rd,rs,rt) DMUL_R6(rd,rs,rt)
+# define muh_r6(rd,rs,rt) DMUH_R6(rd,rs,rt)
# define multu(rs,rt) DMULTU(rs,rt)
+# define mulu_r6(rd,rs,rt) DMULU_R6(rd,rs,rt)
+# define muhu_r6(rd,rs,rt) DMUHU_R6(rd,rs,rt)
# define div(rs,rt) DDIV(rs,rt)
# define divu(rs,rt) DDIVU(rs,rt)
+# define div_r6(rd,rs,rt) DDIV_R6(rd,rs,rt)
+# define divu_r6(rd,rs,rt) DDIVU_R6(rd,rs,rt)
+# define mod_r6(rd,rs,rt) DMOD_R6(rd,rs,rt)
+# define modu_r6(rd,rs,rt) DMODU_R6(rd,rs,rt)
# endif
# define extr(rd,rt,lsb,nb) _extr(_jit,rd,rt,lsb,nb)
static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
-# define movnr(r0,r1,r2) MOVN(r0, r1, r2)
-# define movzr(r0,r1,r2) MOVZ(r0, r1, r2)
+# define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2)
+static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+# define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2)
+static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0)
static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
jit_int32_t,jit_int32_t,jit_word_t);
static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
#define nei(r0,r1,i0) _nei(_jit,r0,r1,i0)
static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#define bltr(i0,r0,r1) _bltr(_jit,i0,r0,r1)
-static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bltr_u(i0,r0,r1) _bltr_u(_jit,i0,r0,r1)
-static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define blti(i0,r0,i1) _blti(_jit,i0,r0,i1)
-static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define blti_u(i0,r0,i1) _blti_u(_jit,i0,r0,i1)
-static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bler(i0,r0,r1) _bler(_jit,i0,r0,r1)
-static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bler_u(i0,r0,r1) _bler_u(_jit,i0,r0,r1)
-static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define blei(i0,r0,i1) _blei(_jit,i0,r0,i1)
-static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define blei_u(i0,r0,i1) _blei_u(_jit,i0,r0,i1)
-static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bltr(i0,r0,r1) bger(i0,r1,r0)
+#define bltr_u(i0,r0,r1) bger_u(i0,r1,r0)
+#define blti(i0,r0,i1) _bgei(_jit,i0,r0,i1,0,1)
+#define blti_u(i0,r0,i1) _bgei(_jit,i0,r0,i1,1,1)
+#define bler(i0,r0,r1) _bgtr(_jit,i0,r1,r0,0,1)
+#define bler_u(i0,r0,r1) _bgtr(_jit,i0,r1,r0,1,1)
+#define blei(i0,r0,i1) _bgti(_jit,i0,r0,i1,0,1)
+#define blei_u(i0,r0,i1) _bgti(_jit,i0,r0,i1,1,1)
#define beqr(i0,r0,r1) _beqr(_jit,i0,r0,r1)
static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
#define beqi(i0,r0,i1) _beqi(_jit,i0,r0,i1)
static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bger(i0,r0,r1) _bger(_jit,i0,r0,r1)
-static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bger_u(i0,r0,r1) _bger_u(_jit,i0,r0,r1)
-static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgei(i0,r0,i1) _bgei(_jit,i0,r0,i1)
-static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgei_u(i0,r0,i1) _bgei_u(_jit,i0,r0,i1)
-static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgtr(i0,r0,r1) _bgtr(_jit,i0,r0,r1)
-static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgtr_u(i0,r0,r1) _bgtr_u(_jit,i0,r0,r1)
-static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgti(i0,r0,i1) _bgti(_jit,i0,r0,i1)
-static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgti_u(i0,r0,i1) _bgti_u(_jit,i0,r0,i1)
-static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0,r0,r1) _bger(_jit,i0,r0,r1,0)
+#define bger_u(i0,r0,r1) _bger(_jit,i0,r0,r1,1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+ jit_bool_t);
+#define bgei(i0,r0,i1) _bgei(_jit,i0,r0,i1,0,0)
+#define bgei_u(i0,r0,i1) _bgei(_jit,i0,r0,i1,1,0)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+ jit_bool_t,jit_bool_t);
+#define bgtr(i0,r0,r1) _bgtr(_jit,i0,r0,r1,0,0)
+#define bgtr_u(i0,r0,r1) _bgtr(_jit,i0,r0,r1,1,0)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+ jit_bool_t,jit_bool_t);
+#define bgti(i0,r0,i1) _bgti(_jit,i0,r0,i1,0,0)
+#define bgti_u(i0,r0,i1) _bgti(_jit,i0,r0,i1,1,0)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+ jit_bool_t,jit_bool_t);
#define bner(i0,r0,r1) _bner(_jit,i0,r0,r1)
static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
#define bnei(i0,r0,i1) _bnei(_jit,i0,r0,i1)
static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
# define jmpr(r0) _jmpr(_jit,r0)
static void _jmpr(jit_state_t*,jit_int32_t);
-# define jmpi(i0) _jmpi(_jit,i0)
-static jit_word_t _jmpi(jit_state_t*,jit_word_t);
+# define jmpi(i0,patch) _jmpi(_jit,i0,patch)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t,jit_bool_t);
+# define jmpi_p(i0) _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
# define boaddr(i0,r0,r1) _boaddr(_jit,i0,r0,r1)
static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
# define boaddi(i0,r0,i1) _boaddi(_jit,i0,r0,i1)
static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
# define callr(r0) _callr(_jit,r0)
static void _callr(jit_state_t*,jit_int32_t);
-# define calli(i0) _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+# define calli(i0,i1) _calli(_jit,i0,i1)
+static jit_word_t _calli(jit_state_t*,jit_word_t,jit_bool_t);
# define calli_p(i0) _calli_p(_jit,i0)
static jit_word_t _calli_p(jit_state_t*,jit_word_t);
# define prolog(node) _prolog(_jit,node)
static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t);
#define patch_at(jump,label) _patch_at(_jit,jump,label)
static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+/* definitions used by jit_get_reg_for_delay_slot() */
+#include "jit_mips-fpu.c"
#endif
#if CODE
+static void
+_instr(jit_state_t *_jit, jit_int32_t op)
+{
+ if (_jitc->inst.pend)
+ ii(_jitc->inst.op);
+ else
+ _jitc->inst.pend = 1;
+ _jitc->inst.op = op;
+}
+
+static void
+_flush(jit_state_t *_jit)
+{
+ if (_jitc->inst.pend) {
+ ii(_jitc->inst.op);
+ _jitc->inst.pend = 0;
+ }
+}
+
+static jit_int32_t
+_pending(jit_state_t *_jit)
+{
+ jit_int32_t op;
+ if (_jitc->inst.pend) {
+ op = _jitc->inst.op;
+ _jitc->inst.pend = 0;
+ }
+ else
+ op = 0;
+ return (op);
+}
+
+static void
+_delay(jit_state_t *_jit, jit_int32_t op)
+{
+ assert(_jitc->inst.pend);
+ ii(_jitc->inst.op);
+ _jitc->inst.pend = 0;
+ ii(op);
+}
+
+static jit_int32_t
+_jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
+ jit_int32_t reg0, jit_int32_t reg1)
+{
+ jit_instr_t i;
+ jit_int32_t reg, r0, r1, r2, regs[3];
+ /* If will emit a pending instruction */
+ if (_jitc->inst.pend)
+ i.op = _jitc->inst.op;
+ /* Else if at least one instruction emited, check it */
+ else if (_jit->pc.uc > _jit->code.ptr)
+ i.op = _jit->pc.ui[-1];
+ /* Else, a nop */
+ else
+ i.op = 0;
+ regs[0] = regs[1] = regs[2] = -1;
+ switch (i.hc.b) {
+ case MIPS_SPECIAL: /* 00 */
+ switch (i.tc.b) {
+ case MIPS_SLLV: /* 04 */
+ case MIPS_SRLV: /* 06 */
+ case MIPS_SRAV: /* 07 */
+ case MIPS_DSLLV: /* 14 */
+ case MIPS_DSRLV: /* 16 */
+ case MIPS_DSRAV: /* 17 */
+ case MIPS_ADDU: /* 21 */
+ case MIPS_SUBU: /* 23 */
+ case MIPS_AND: /* 24 */
+ case MIPS_OR: /* 25 */
+ case MIPS_XOR: /* 26 */
+ case MIPS_NOR: /* 27 */
+ case MIPS_SLT: /* 2a */
+ case MIPS_SLTU: /* 2b */
+ case MIPS_DADDU: /* 2d */
+ case MIPS_DSUBU: /* 2f */
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = i.rd.b;
+ }
+ break;
+ /* MUL MUH */
+ case MIPS_MULT: /* 18 */
+ /* MULU MUHU */
+ case MIPS_MULTU: /* 19 */
+ /* DIV MOD */
+ case MIPS_DIV: /* 1a */
+ /* DIVU MODU */
+ case MIPS_DIVU: /* 1b */
+ /* DMUL DMUH */
+ case MIPS_DMULT: /* 1c */
+ /* DMULU DMUHU */
+ case MIPS_DMULTU: /* 1d */
+ /* DDIV DMOD */
+ case MIPS_DDIV: /* 1e */
+ /* DDIVU DMODU */
+ case MIPS_DDIVU: /* 1f */
+ if (jit_mips6_p()) {
+ assert(i.ic.b == 2 || i.ic.b == 3);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = i.rd.b;
+ }
+ }
+ else {
+ assert(i.rd.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = 0;
+ }
+ }
+ break;
+ /* CLZ */
+ case MIPS_MFHI: /* 10 */
+ /* CLO */
+ case MIPS_MTHI: /* 11 */
+ /* DCLZ */
+ case MIPS_MFLO: /* 12 */
+ /* DCLO */
+ case MIPS_MTLO: /* 13 */
+ if (mask & jit_class_gpr) {
+ if (jit_mips6_p()) {
+ assert(i.ic.b == 1);
+ regs[1] = i.rd.b;
+ }
+ else {
+ assert(!i.rs.b && !i.rt.b);
+ regs[1] = 0;
+ }
+ regs[0] = i.rd.b;
+ regs[1] = 0;
+ }
+ break;
+ case MIPS_JR: /* 08 */
+ assert(!jit_mips6_p());
+ case MIPS_JALR: /* 09 */
+ /* check for proper/known encondig */
+ assert(!i.ic.b);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = i.rd.b;
+ }
+ break;
+ case MIPS_SLL: /* 00 */
+ case MIPS_SRL: /* 02 */
+ case MIPS_SRA: /* 03 */
+ case MIPS_DSLL: /* 38 */
+ case MIPS_DSRL: /* 3a */
+ case MIPS_DSRA: /* 3b */
+ case MIPS_DSLL32: /* 3c */
+ case MIPS_DSRA32: /* 3f */
+ case MIPS_DSRL32: /* 3e */
+ /* shift (or rotate if i.rs.b == 1) */
+ assert(i.rs.b == 0 || i.rs.b == 1);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rt.b;
+ regs[1] = i.rd.b;
+ regs[2] = 0;
+ }
+ break;
+ case MIPS_SYNC: /* 0f */
+ assert(i.rs.b == 0 && i.rt.b == 0 && i.rd.b == 0);
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[1] = 0;
+ break;
+ case MIPS_MOVZ: /* 0a */
+ case MIPS_MOVN: /* 0b */
+ assert(!jit_mips6_p() && i.ic.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = i.rd.b;
+ }
+ break;
+ /* SELEQZ */
+ case 53: /* 35 */
+ /* SELNEZ */
+ case 55: /* 37 */
+ assert(jit_mips6_p() && i.ic.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = i.rd.b;
+ }
+ break;
+ default:
+ abort();
+ }
+ break;
+ case MIPS_REGIMM: /* 01 */
+ switch (i.rt.b) {
+ case MIPS_BLTZ: /* 00 */
+ case MIPS_BGEZ: /* 01 */
+ case MIPS_BGEZAL: /* 11 */
+ break;
+ default:
+ abort();
+ }
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = regs[2] = 0;
+ }
+ break;
+ case MIPS_J: /* 02 */
+ case MIPS_JAL: /* 03 */
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[2] = 0;
+ break;
+ case MIPS_LUI: /* 0f */
+ assert(i.rs.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rt.b;
+ regs[1] = regs[1] = 0;
+ }
+ break;
+ case MIPS_SPECIAL2: /* 1c */
+ switch (i.tc.b) {
+ case MIPS_CLZ: /* 20 */
+ case MIPS_CLO: /* 21 */
+ case MIPS_DCLZ: /* 24 */
+ case MIPS_DCLO: /* 25 */
+ assert(!jit_mips6_p() && i.rt.b == i.rd.b && i.ic.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rd.b;
+ regs[2] = 0;
+ }
+ break;
+ case MIPS_MUL: /* 02 */
+ assert(jit_mips2_p() && i.ic.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = i.rd.b;
+ }
+ break;
+ default:
+ abort();
+ }
+ break;
+ case MIPS_SPECIAL3: /* 1f */
+ switch (i.tc.b) {
+ case MIPS_EXT: /* 00 */
+ case MIPS_DEXTM: /* 01 */
+ case MIPS_DEXTU: /* 02 */
+ case MIPS_DEXT: /* 03 */
+ case MIPS_INS: /* 04 */
+ case MIPS_DINSM: /* 05 */
+ case MIPS_DINSU: /* 06 */
+ case MIPS_DINS: /* 07 */
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = 0;
+ }
+ break;
+ /* BITSWAP */
+ case MIPS_BSHFL: /* 20 */
+ /* DBITSWAP */
+ case MIPS_DBSHFL: /* 24 */
+ switch (i.ic.b) {
+ case MIPS_WSBH: /* 02 */
+ case MIPS_SEB: /* 10 */
+ case MIPS_SEH: /* 18 */
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rt.b;
+ regs[1] = i.rd.b;
+ regs[2] = 0;
+ }
+ break;
+ /* BITSWAP DBITSWAP */
+ case 0:
+ assert(jit_mips6_p() && i.rt.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rd.b;
+ regs[2] = 0;
+ }
+ break;
+ default:
+ abort();
+ }
+ break;
+ /* SC */
+ case 38: /* 26 */
+ /* SCD */
+ case 39: /* 27 */
+ /* LD */
+ case 54: /* 36 */
+ /* LLD */
+ case 55: /* 37 */
+ assert(jit_mips6_p());
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = 0;
+ }
+ break;
+ default:
+ abort();
+ }
+ break;
+ case MIPS_COP1: /* 11 */
+ switch (i.tc.b) {
+ case MIPS_ADD_fmt: /* 00 */
+ switch (i.rs.b) {
+ case MIPS_MF: /* 00 */
+ case MIPS_DMF: /* 01 */
+ case MIPS_MFH: /* 03 */
+ case MIPS_MT: /* 04 */
+ case MIPS_DMT: /* 05 */
+ case MIPS_MTH: /* 07 */
+ assert(i.ic.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rt.b;
+ regs[1] = regs[2] = 0;
+ }
+ else
+ regs[0] = i.rd.b;
+ break;
+ default:
+ goto three_fprs;
+ }
+ break;
+ case MIPS_SUB_fmt: /* 01 */
+ case MIPS_MUL_fmt: /* 02 */
+ case MIPS_DIV_fmt: /* 03 */
+ three_fprs:
+ /* 10 */
+ assert(i.rs.b == MIPS_fmt_S ||
+ /* 11 */
+ i.rs.b == MIPS_fmt_D);
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[2] = 0;
+ else {
+ regs[0] = i.rt.b;
+ regs[1] = i.rd.b;
+ regs[2] = i.ic.b;
+ }
+ break;
+ case MIPS_SQRT_fmt: /* 04 */
+ case MIPS_ABS_fmt: /* 05 */
+ case MIPS_MOV_fmt: /* 06 */
+ case MIPS_NEG_fmt: /* 07 */
+ assert((i.rs.b == MIPS_fmt_S || i.rs.b == MIPS_fmt_D) &&
+ i.rt.b == 0);
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[2] = 0;
+ else {
+ regs[0] = i.rd.b;
+ regs[1] = i.ic.b;
+ }
+ break;
+ case MIPS_CVT_fmt_S: /* 20 */
+ case MIPS_CVT_fmt_D: /* 21 */
+ case MIPS_CVT_fmt_W: /* 24 */
+ case MIPS_CVT_fmt_L: /* 25 */
+ switch (i.rs.b) {
+ case MIPS_fmt_S:/* 10 */
+ case MIPS_fmt_D:/* 11 */
+ case MIPS_fmt_W:/* 14 */
+ case MIPS_fmt_L:/* 15 */
+ break;
+ default:
+ abort();
+ }
+ assert(i.rt.b == 0);
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[2] = 0;
+ else {
+ regs[0] = i.rd.b;
+ regs[1] = i.ic.b;
+ }
+ break;
+ case MIPS_cond_F: /* 30 */
+ case MIPS_cond_UN: /* 31 */
+ case MIPS_cond_EQ: /* 32 */
+ case MIPS_cond_UEQ: /* 33 */
+ case MIPS_cond_OLT: /* 34 */
+ case MIPS_cond_ULT: /* 35 */
+ case MIPS_cond_OLE: /* 36 */
+ case MIPS_cond_ULE: /* 37 */
+ case MIPS_cond_SF: /* 38 */
+ case MIPS_cond_NGLE: /* 39 */
+ case MIPS_cond_SEQ: /* 3a */
+ case MIPS_cond_NGL: /* 3b */
+ case MIPS_cond_LT: /* 3c */
+ case MIPS_cond_NGE: /* 3d */
+ case MIPS_cond_LE: /* 3e */
+ case MIPS_cond_UGT: /* 3f */
+ assert(!jit_mips6_p() &&
+ /* 10 */
+ (i.fm.b == MIPS_fmt_S ||
+ /* 11 */
+ i.fm.b == MIPS_fmt_D));
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[2] = 0;
+ else {
+ regs[0] = i.ft.b;
+ regs[1] = i.fs.b;
+ }
+ break;
+ default:
+ switch (i.ic.b) {
+ case MIPS_cmp_AF: /* 00 */
+ case MIPS_cmp_UN: /* 01 */
+ case MIPS_cmp_EQ: /* 02 */
+ case MIPS_cmp_UEQ: /* 03 */
+ case MIPS_cmp_LT: /* 04 */
+ case MIPS_cmp_ULT: /* 05 */
+ case MIPS_cmp_LE: /* 06 */
+ case MIPS_cmp_ULE: /* 07 */
+ case MIPS_cmp_SAF: /* 08 */
+ case MIPS_cmp_SUN: /* 09 */
+ case MIPS_cmp_SEQ: /* 0a */
+ case MIPS_cmp_SUEQ:/* 0b */
+ case MIPS_cmp_SLT: /* 0c */
+ case MIPS_cmp_SULT:/* 0d */
+ case MIPS_cmp_SLE: /* 0e */
+ case MIPS_cmp_SULE:/* 0f */
+ assert(jit_mips6_p() &&
+ /* 14 */
+ (i.rs.b == MIPS_condn_S ||
+ /* 15 */
+ i.rs.b == MIPS_condn_D));
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[2] = 0;
+ else {
+ regs[0] = i.ft.b;
+ regs[1] = i.fs.b;
+ regs[2] = i.fd.b;
+ }
+ goto done;
+ default:
+ break;
+ }
+ switch (i.rt.b) {
+ case MIPS_BC: /* 08 */
+ assert(!jit_mips6_p() &&
+ /* 00 */
+ (i.rs.b == MIPS_BCF ||
+ /* 01 */
+ i.rs.b == MIPS_BCT));
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[2] = 0;
+ else {
+ regs[0] = i.rt.b;
+ regs[1] = i.rd.b;
+ }
+ break;
+ case MIPS_BC1EQZ:/* 09 */
+ case MIPS_BC1NEZ:/* 0a */
+ assert(jit_mips6_p());
+ if (mask & jit_class_gpr)
+ regs[0] = regs[1] = regs[2] = 0;
+ else
+ regs[0] = i.rt.b;
+ break;
+ default:
+ abort();
+ }
+ break;
+ }
+ break;
+ case MIPS_ADDIU: /* 09 */
+ case MIPS_SLTI: /* 0a */
+ case MIPS_SLTIU: /* 0b */
+ case MIPS_ANDI: /* 0c */
+ case MIPS_ORI: /* 0d */
+ case MIPS_XORI: /* 0e */
+ case MIPS_DADDIU: /* 18 */
+ case MIPS_LB: /* 20 */
+ case MIPS_LH: /* 21 */
+ case MIPS_LW: /* 23 */
+ case MIPS_LBU: /* 24 */
+ case MIPS_LHU: /* 25 */
+ case MIPS_LWU: /* 27 */
+ case MIPS_SB: /* 28 */
+ case MIPS_SH: // 29 */
+ case MIPS_SW: /* 2b */
+ case MIPS_LD: /* 37 */
+ case MIPS_SD: /* 3f */
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = 0;
+ }
+ break;
+ case MIPS_LL: /* 30 */
+ case MIPS_LLD: /* 34 */
+ case MIPS_SC: /* 38 */
+ case MIPS_SCD: /* 3c */
+ assert(!jit_mips6_p() && i.ic.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = 0;
+ }
+ break;
+ case MIPS_BLEZ: /* 06 */
+ case MIPS_BGTZ: /* 07 */
+ assert(i.rt.b == 0);
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = regs[2] = 0;
+ }
+ break;
+ case MIPS_BEQ: /* 04 */
+ case MIPS_BNE: /* 05 */
+ assert(i.rt.b == 0);
+ case MIPS_LWC1: /* 31 */
+ case MIPS_LDC1: /* 35 */
+ case MIPS_SWC1: /* 39 */
+ case MIPS_SDC1: /* 3d */
+ if (mask & jit_class_gpr) {
+ regs[0] = i.rs.b;
+ regs[1] = i.rt.b;
+ regs[2] = 0;
+ }
+ else
+ regs[0] = i.rt.b;
+ break;
+ default:
+ abort();
+ }
+done:
+ /* If cannot move instruction do delay slot */
+ if (_jitc->inst.pend &&
+ (((mask & jit_class_fpr) || reg0) &&
+ (reg0 == regs[0] || reg0 == regs[1] || reg0 == regs[2])) ||
+ (((mask & jit_class_fpr) || reg1) &&
+ (reg1 == regs[0] || reg1 == regs[1] || reg1 == regs[2]))) {
+ flush();
+ }
+ /* Get a temporary register */
+retry:
+ reg = jit_get_reg(mask|jit_class_nospill);
+ /* Make sure will not use a register in use by delay slot */
+ if (_jitc->inst.pend) {
+ if (rn(reg) == regs[0] ||
+ rn(reg) == regs[1] || rn(reg) == regs[2]) {
+ r0 = reg;
+ reg = jit_get_reg(mask|jit_class_nospill);
+ if (rn(reg) == regs[0] ||
+ rn(reg) == regs[1] || rn(reg) == regs[2]) {
+ r1 = reg;
+ reg = jit_get_reg(mask|jit_class_nospill);
+ if (rn(reg) == regs[0] ||
+ rn(reg) == regs[1] || rn(reg) == regs[2]) {
+ r2 = reg;
+ reg = jit_get_reg(mask|jit_class_nospill);
+ jit_unget_reg(r2);
+ }
+ jit_unget_reg(r1);
+ }
+ jit_unget_reg(r0);
+ }
+ }
+ if (reg == JIT_NOREG) {
+ /* Cannot get a register to optimize delay slot */
+ flush();
+ /* Must find a free register */
+ if (!(mask & jit_class_chk))
+ goto retry;
+ }
+ assert(reg != JIT_NOREG || (mask & jit_class_chk));
+ return (reg);
+}
+
static void
_hrrrit(jit_state_t *_jit,jit_int32_t hc,
jit_int32_t rs, jit_int32_t rt, jit_int32_t rd,
i.rt.b = rt;
i.rs.b = rs;
i.hc.b = hc;
- ii(i.op);
+ instr(i.op);
}
static void
i.rt.b = rt;
i.rs.b = rs;
i.hc.b = hc;
- ii(i.op);
+ instr(i.op);
+}
+
+static void
+_hrri9(jit_state_t *_jit, jit_int32_t hc,
+ jit_int32_t rs, jit_int32_t rt, jit_int32_t i9, jit_int32_t tc)
+{
+ jit_instr_t i;
+ i.op = 0;
+ i.tc.b = tc;
+ i.i9.b = i9;
+ i.rt.b = rt;
+ i.rs.b = rs;
+ i.hc.b = hc;
+ instr(i.op);
}
static void
jit_instr_t i;
i.ii.b = im;
i.hc.b = hc;
- ii(i.op);
+ instr(i.op);
}
static void
DINS(r0, r1, pos, size);
}
+/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+/*
+unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2
+unsigned int mask = ~0;
+while ((s >>= 1) > 0)
+{
+ mask ^= (mask << s);
+ v = ((v >> s) & mask) | ((v << s) & ~mask);
+}
+*/
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t v, jit_int32_t r1)
+{
+ jit_int32_t s, mask;
+ jit_word_t loop, done, t0, t1;
+ movr(v, r1);
+ s = jit_get_reg(jit_class_gpr);
+ movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */
+ mask = jit_get_reg(jit_class_gpr);
+ movi(rn(mask), ~0L); /* mask = ~0; */
+ flush();
+ loop = _jit->pc.w; /* while ((s >>= 1) > 0) */
+ rshi(rn(s), rn(s), 1); /* (s >>= 1) */
+ done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */
+ t0 = jit_get_reg(jit_class_gpr);
+ lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */
+ xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */
+ rshr(rn(t0), v, rn(s)); /* t0 = v >> s */
+ andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */
+ t1 = jit_get_reg(jit_class_gpr);
+ lshr(rn(t1), v, rn(s)); /* t1 = v << s */
+ comr(v, rn(mask)); /* v = ~mask */
+ andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */
+ orr(v, rn(t0), rn(t1)); /* v = t0 | t1 */
+ jmpi(loop, 0);
+ flush();
+ patch_at(done, _jit->pc.w);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+ jit_unget_reg(mask);
+ jit_unget_reg(s);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+ if (jit_mips6_p())
+ CLO_R6(r0, r1);
+ else
+ CLO(r0, r1);
+#else
+ if (jit_mips6_p())
+ DCLO_R6(r0, r1);
+ else
+ DCLO(r0, r1);
+#endif
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+ if (jit_mips6_p())
+ CLZ_R6(r0, r1);
+ else
+ CLZ(r0, r1);
+#else
+ if (jit_mips6_p())
+ DCLZ_R6(r0, r1);
+ else
+ DCLZ(r0, r1);
+#endif
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_mips6_p()) {
+#if __WORDSIZE == 32
+ BITSWAP(r0, r1);
+ bswapr_ui(r0, r0);
+ CLO_R6(r0, r0);
+#else
+ DBITSWAP(r0, r1);
+ bswapr_ul(r0, r0);
+ DCLO_R6(r0, r0);
+#endif
+ }
+ else {
+ bitswap(r0, r1);
+ clor(r0, r0);
+ }
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_mips6_p()) {
+#if __WORDSIZE == 32
+ BITSWAP(r0, r1);
+ bswapr_ui(r0, r0);
+ CLZ_R6(r0, r0);
+#else
+ DBITSWAP(r0, r1);
+ bswapr_ul(r0, r0);
+ DCLZ_R6(r0, r0);
+#endif
+ }
+ else {
+ bitswap(r0, r1);
+ clzr(r0, r0);
+ }
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
static void
_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- if (jit_mips2_p() && __WORDSIZE == 32)
- MUL(r0, r1, r2);
+ if (jit_mips6_p())
+ mul_r6(r0, r1, r2);
else {
- multu(r1, r2);
- MFLO(r0);
+ if (jit_mips2_p() && __WORDSIZE == 32)
+ MUL(r0, r1, r2);
+ else {
+ multu(r1, r2);
+ MFLO(r0);
+ }
}
}
_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
- if (sign)
- mult(r2, r3);
- else
- multu(r2, r3);
- MFLO(r0);
- MFHI(r1);
+ jit_int32_t t0;
+ if (jit_mips6_p()) {
+ if (r0 == r2 || r0 == r3) {
+ t0 = jit_get_reg(jit_class_gpr);
+ if (sign)
+ mul_r6(rn(t0), r2, r3);
+ else
+ mulu_r6(rn(t0), r2, r3);
+ }
+ else {
+ if (sign)
+ mul_r6(r0, r2, r3);
+ else
+ mulu_r6(r0, r2, r3);
+ }
+ if (sign)
+ muh_r6(r1, r2, r3);
+ else
+ muhu_r6(r1, r2, r3);
+ if (r0 == r2 || r0 == r3) {
+ movr(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (sign)
+ mult(r2, r3);
+ else
+ multu(r2, r3);
+ MFLO(r0);
+ MFHI(r1);
+ }
}
static void
static void
_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- div(r1, r2);
- MFLO(r0);
+ if (jit_mips6_p())
+ div_r6(r0, r1, r2);
+ else {
+ div(r1, r2);
+ MFLO(r0);
+ }
}
static void
static void
_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- divu(r1, r2);
- MFLO(r0);
+ if (jit_mips6_p())
+ divu_r6(r0, r1, r2);
+ else {
+ divu(r1, r2);
+ MFLO(r0);
+ }
}
static void
_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
{
- if (sign)
- div(r2, r3);
- else
- divu(r2, r3);
- MFLO(r0);
- MFHI(r1);
+ jit_int32_t t0;
+ if (jit_mips6_p()) {
+ if (r0 == r2 || r0 == r3)
+ t0 = jit_get_reg(jit_class_gpr);
+ else
+ t0 = _NOREG;
+ if (sign) {
+ if (t0 == _NOREG)
+ div_r6(r0, r2, r3);
+ else
+ div_r6(rn(t0), r2, r3);
+ mod_r6(r1, r2, r3);
+ }
+ else {
+ if (t0 == _NOREG)
+ divu_r6(r0, r2, r3);
+ else
+ divu_r6(rn(t0), r2, r3);
+ modu_r6(r1, r2, r3);
+ }
+ if (t0 != _NOREG) {
+ movr(r0, rn(t0));
+ jit_unget_reg(t0);
+ }
+ }
+ else {
+ if (sign)
+ div(r2, r3);
+ else
+ divu(r2, r3);
+ MFLO(r0);
+ MFHI(r1);
+ }
}
static void
static void
_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- div(r1, r2);
- MFHI(r0);
+ if (jit_mips6_p())
+ mod_r6(r0, r1, r2);
+ else {
+ div(r1, r2);
+ MFHI(r0);
+ }
}
static void
static void
_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- divu(r1, r2);
- MFHI(r0);
+ if (jit_mips6_p())
+ modu_r6(r0, r1, r2);
+ else {
+ divu(r1, r2);
+ MFHI(r0);
+ }
}
static void
_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_word_t w;
-
+ flush();
w = _jit->pc.w;
# if __WORDSIZE == 32
LUI(r0, i0 >> 16);
return (w);
}
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_gpr);
+ SELNEZ(rn(reg), r1, r2);
+ SELEQZ(r0, r0, r2);
+ OR(r0, r0, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else
+ MOVN(r0, r1, r2);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_gpr);
+ SELEQZ(rn(reg), r1, r2);
+ SELNEZ(r0, r0, r2);
+ OR(r0, r0, rn(reg));
+ jit_unget_reg(reg);
+ }
+ else
+ MOVZ(r0, r1, r2);
+}
+
static void
_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
}
SYNC();
/* retry: */
+ flush();
retry = _jit->pc.w;
# if __WORDSIZE == 32
- LL(r0, 0, r1);
+ if (jit_mips6_p()) LL_R6(r0, 0, r1);
+ else LL(r0, 0, r1);
# else
- LLD(r0, 0, r1);
+ if (jit_mips6_p()) LLD_R6(r0, 0, r1);
+ else LLD(r0, 0, r1);
# endif
+ flush();
jump0 = _jit->pc.w;
BNE(r0, r2, 1); /* bne done r0 r2 */
movi(r0, 0); /* set to 0 in delay slot */
+ flush();
movr(r0, r3); /* after jump and delay slot */
/* store new value */
# if __WORDSIZE == 32
- SC(r0, 0, r1);
+ if (jit_mips6_p()) SC_R6(r0, 0, r1);
+ else SC(r0, 0, r1);
# else
- SCD(r0, 0, r1);
+ if (jit_mips6_p()) SCD_R6(r0, 0, r1);
+ else SCD(r0, 0, r1);
# endif
+ flush();
jump1 = _jit->pc.w;
BEQ(r0, _ZERO_REGNO, 0); /* beqi retry r0 0 */
movi(r0, 1); /* set to 1 in delay slot */
+ flush();
SYNC();
/* done: */
+ flush();
done = _jit->pc.w;
patch_at(jump0, done);
patch_at(jump1, retry);
static void
_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
{
- jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- addr(rn(reg), r1, r2);
- ldr_c(r0, rn(reg));
- jit_unget_reg(reg);
+ addr(r0, r1, r2);
+ ldr_c(r0, r0);
}
static void
_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
if (can_sign_extend_short_p(i0))
LB(r0, i0, r1);
else {
- reg = jit_get_reg(jit_class_gpr);
- addi(rn(reg), r1, i0);
- ldr_c(r0, rn(reg));
- jit_unget_reg(reg);
+ addi(r0, r1, i0);
+ ldr_c(r0, r0);
}
}
static void
_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
{
- jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- addr(rn(reg), r1, r2);
- ldr_uc(r0, rn(reg));
- jit_unget_reg(reg);
+ addr(r0, r1, r2);
+ ldr_uc(r0, r0);
}
static void
_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
if (can_sign_extend_short_p(i0))
LBU(r0, i0, r1);
else {
- reg = jit_get_reg(jit_class_gpr);
- addi(rn(reg), r1, i0);
- ldr_uc(r0, rn(reg));
- jit_unget_reg(reg);
+ addi(r0, r1, i0);
+ ldr_uc(r0, r0);
}
}
static void
_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
{
- jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- addr(rn(reg), r1, r2);
- ldr_s(r0, rn(reg));
- jit_unget_reg(reg);
+ addr(r0, r1, r2);
+ ldr_s(r0, r0);
}
static void
_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
if (can_sign_extend_short_p(i0))
LH(r0, i0, r1);
else {
- reg = jit_get_reg(jit_class_gpr);
- addi(rn(reg), r1, i0);
- ldr_s(r0, rn(reg));
- jit_unget_reg(reg);
+ addi(r0, r1, i0);
+ ldr_s(r0, r0);
}
}
static void
_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
{
- jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- addr(rn(reg), r1, r2);
- ldr_us(r0, rn(reg));
- jit_unget_reg(reg);
+ addr(r0, r1, r2);
+ ldr_us(r0, r0);
}
static void
_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
if (can_sign_extend_short_p(i0))
LHU(r0, i0, r1);
else {
- reg = jit_get_reg(jit_class_gpr);
- addi(rn(reg), r1, i0);
- ldr_us(r0, rn(reg));
- jit_unget_reg(reg);
+ addi(r0, r1, i0);
+ ldr_us(r0, r0);
}
}
static void
_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
{
- jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- addr(rn(reg), r1, r2);
- ldr_i(r0, rn(reg));
- jit_unget_reg(reg);
+ addr(r0, r1, r2);
+ ldr_i(r0, r0);
}
static void
_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
if (can_sign_extend_short_p(i0))
LW(r0, i0, r1);
else {
- reg = jit_get_reg(jit_class_gpr);
- addi(rn(reg), r1, i0);
- ldr_i(r0, rn(reg));
- jit_unget_reg(reg);
+ addi(r0, r1, i0);
+ ldr_i(r0, r0);
}
}
static void
_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
{
- jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- addr(rn(reg), r1, r2);
- ldr_ui(r0, rn(reg));
- jit_unget_reg(reg);
+ addr(r0, r1, r2);
+ ldr_ui(r0, r0);
}
static void
_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
if (can_sign_extend_short_p(i0))
LWU(r0, i0, r1);
else {
- reg = jit_get_reg(jit_class_gpr);
- addi(rn(reg), r1, i0);
- ldr_ui(r0, rn(reg));
- jit_unget_reg(reg);
+ addi(r0, r1, i0);
+ ldr_ui(r0, r0);
}
}
static void
_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
{
- jit_int32_t reg;
- reg = jit_get_reg(jit_class_gpr);
- addr(rn(reg), r1, r2);
- ldr_l(r0, rn(reg));
- jit_unget_reg(reg);
+ addr(r0, r1, r2);
+ ldr_l(r0, r0);
}
static void
_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
if (can_sign_extend_short_p(i0))
LD(r0, i0, r1);
else {
- reg = jit_get_reg(jit_class_gpr);
- addi(rn(reg), r1, i0);
- ldr_l(r0, rn(reg));
- jit_unget_reg(reg);
+ addi(r0, r1, i0);
+ ldr_l(r0, r0);
}
}
#endif
_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
subr(r0, r1, r2);
- SLTU(r0, _ZERO_REGNO, r0);
- XORI(r0, r0, 1);
+ SLTIU(r0, r0, 1);
}
static void
{
if (i0) {
subi(r0, r1, i0);
- SLTU(r0, _ZERO_REGNO, r0);
+ SLTIU(r0, r0, 1);
+ } else {
+ SLTIU(r0, r1, 1);
}
- else
- SLTU(r0, _ZERO_REGNO, r1);
- XORI(r0, r0, 1);
}
static void
}
static jit_word_t
-_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr);
- SLT(rn(reg), r0, r1);
- w = _jit->pc.w;
- BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- jit_unget_reg(reg);
-
- return (w);
-}
-
-static jit_word_t
-_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- SLTU(rn(reg), r0, r1);
- w = _jit->pc.w;
- BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- jit_unget_reg(reg);
-
- return (w);
-}
-
-static jit_word_t
-_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
- jit_word_t w;
- jit_word_t d;
- jit_int32_t reg;
- jit_bool_t zero_p;
-
- if (!(zero_p = i1 == 0))
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- if (can_sign_extend_short_p(i1)) {
- if (!zero_p)
- SLTI(rn(reg), r0, i1);
- w = _jit->pc.w;
- d = ((i0 - w) >> 2) - 1;
- if (!zero_p)
- BNE(rn(reg), _ZERO_REGNO, d);
- else
- BLTZ(r0, d);
- NOP(1);
- }
- else {
- movi(rn(reg), i1);
- w = bltr(i0, r0, rn(reg));
- }
- if (!zero_p)
- jit_unget_reg(reg);
-
- return (w);
-}
-
-static jit_word_t
-_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- if (can_sign_extend_short_p(i1)) {
- SLTIU(rn(reg), r0, i1);
- w = _jit->pc.w;
- BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- }
- else {
- movi(rn(reg), i1);
- w = bltr_u(i0, r0, rn(reg));
- }
- jit_unget_reg(reg);
-
- return (w);
-}
-
-static jit_word_t
-_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- SLT(rn(reg), r1, r0);
- w = _jit->pc.w;
- BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- jit_unget_reg(reg);
-
- return (w);
-}
-
-static jit_word_t
-_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- SLTU(rn(reg), r1, r0);
- w = _jit->pc.w;
- BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- jit_unget_reg(reg);
-
- return (w);
-}
-
-static jit_word_t
-_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- if (i1 == 0) {
- w = _jit->pc.w;
- BLEZ(r0, ((i0 - w) >> 2) - 1);
- NOP(1);
- }
- else {
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- movi(rn(reg), i1);
- w = bler(i0, r0, rn(reg));
- jit_unget_reg(reg);
- }
-
- return (w);
-}
-
-static jit_word_t
-_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- if (i1 == 0) {
- w = _jit->pc.w;
- BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- }
- else {
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- movi(rn(reg), i1);
- w = bler_u(i0, r0, rn(reg));
- jit_unget_reg(reg);
- }
-
- return (w);
-}
-
-static jit_word_t
-_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
- jit_word_t w;
-
+ jit_int32_t op, reg;
+ /* Just to not move incorrectly instruction to delay slot */
+ reg = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, r0, r1);
+ op = pending();
+ /* implicit flush() */
w = _jit->pc.w;
BEQ(r0, r1, ((i0 - w) >> 2) - 1);
- NOP(1);
-
+ delay(op);
+ if (reg != JIT_NOREG)
+ jit_unget_reg(reg);
return (w);
}
_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
jit_word_t w;
- jit_int32_t reg;
-
- if (i1 == 0) {
- w = _jit->pc.w;
- BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- }
+ jit_int32_t op, reg;
+ if (i1 == 0)
+ w = beqr(i0, r0, _ZERO_REGNO);
else {
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+ op = pending();
movi(rn(reg), i1);
- w = beqr(i0, r0, rn(reg));
+ flush();
+ w = _jit->pc.w;
+ BEQ(r0, rn(reg), ((i0 - w) >> 2) - 1);
+ delay(op);
jit_unget_reg(reg);
}
-
return (w);
}
static jit_word_t
-_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+ jit_bool_t sltu)
{
jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- SLT(rn(reg), r0, r1);
- w = _jit->pc.w;
- BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- jit_unget_reg(reg);
-
- return (w);
-}
-
-static jit_word_t
-_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- SLTU(rn(reg), r0, r1);
+ jit_int32_t op, reg;
+ reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+ op = pending();
+ if (sltu)
+ SLTU(rn(reg), r0, r1);
+ else
+ SLT(rn(reg), r0, r1);
+ flush();
w = _jit->pc.w;
BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
+ delay(op);
jit_unget_reg(reg);
-
return (w);
}
static jit_word_t
-_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+ jit_bool_t sltiu, jit_bool_t bne)
{
jit_word_t w;
jit_word_t d;
- jit_int32_t reg;
jit_bool_t zero_p;
-
- if (!(zero_p = i1 == 0))
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ jit_int32_t op, t0, mask;
+ zero_p = !sltiu && i1 == 0;
+ /* Even if zero_p allocate one as a mean to avoid incorrect delay slot */
+ mask = jit_class_gpr;
+ if (zero_p)
+ mask |= jit_class_chk;
+ t0 = jit_get_reg_for_delay_slot(mask, r0, _ZERO_REGNO);
if (can_sign_extend_short_p(i1)) {
- if (!zero_p)
- SLTI(rn(reg), r0, i1);
+ op = pending();
+ if (!zero_p) {
+ if (sltiu)
+ SLTIU(rn(t0), r0, i1);
+ else
+ SLTI(rn(t0), r0, i1);
+ }
+ flush();
w = _jit->pc.w;
d = ((i0 - w) >> 2) - 1;
- if (!zero_p)
- BEQ(rn(reg), _ZERO_REGNO, d);
- else
- BGEZ(r0, d);
- NOP(1);
+ if (bne) {
+ if (!zero_p)
+ BNE(rn(t0), _ZERO_REGNO, d);
+ else
+ BLTZ(r0, d);
+ }
+ else {
+ if (!zero_p)
+ BEQ(rn(t0), _ZERO_REGNO, d);
+ else
+ BGEZ(r0, d);
+ }
}
else {
- movi(rn(reg), i1);
- w = bger(i0, r0, rn(reg));
- }
- if (!zero_p)
- jit_unget_reg(reg);
-
- return (w);
-}
-
-static jit_word_t
-_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- if (can_sign_extend_short_p(i1)) {
- SLTIU(rn(reg), r0, i1);
+ op = pending();
+ movi(rn(t0), i1);
+ if (sltiu)
+ SLTU(rn(t0), r0, rn(t0));
+ else
+ SLT(rn(t0), r0, rn(t0));
+ flush();
w = _jit->pc.w;
- BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- }
- else {
- movi(rn(reg), i1);
- w = bger_u(i0, r0, rn(reg));
+ if (bne)
+ BNE(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+ else
+ BEQ(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
}
- jit_unget_reg(reg);
-
- return (w);
-}
-
-static jit_word_t
-_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- SLT(rn(reg), r1, r0);
- w = _jit->pc.w;
- BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- jit_unget_reg(reg);
-
+ delay(op);
+ if (t0 != JIT_NOREG)
+ jit_unget_reg(t0);
return (w);
}
static jit_word_t
-_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+ jit_bool_t sltu, jit_bool_t inv)
{
jit_word_t w;
- jit_int32_t reg;
-
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- SLTU(rn(reg), r1, r0);
+ jit_int32_t op, reg;
+ reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+ op = pending();
+ if (sltu)
+ SLTU(rn(reg), r1, r0);
+ else
+ SLT(rn(reg), r1, r0);
+ flush();
w = _jit->pc.w;
- BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
+ if (inv)
+ BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+ else
+ BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+ delay(op);
jit_unget_reg(reg);
-
return (w);
}
static jit_word_t
-_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+ jit_bool_t sltiu, jit_bool_t inv)
{
jit_word_t w;
- jit_int32_t reg;
-
+ jit_int32_t op, t0, mask;
+ mask = jit_class_gpr;
+ if (i0 == 0)
+ mask |= jit_class_chk;
+ /* Allocate even if i0 == 0 as a way to avoid incorrect delay slot */
+ t0 = jit_get_reg_for_delay_slot(mask, r0, _ZERO_REGNO);
if (i1 == 0) {
+ op = pending();
+ /* implicit flush() */
w = _jit->pc.w;
- BGTZ(r0, ((i0 - w) >> 2) - 1);
- NOP(1);
+ if (inv) {
+ if (sltiu)
+ BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+ else
+ BLEZ(r0, ((i0 - w) >> 2) - 1);
+ }
+ else {
+ if (sltiu)
+ BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+ else
+ BGTZ(r0, ((i0 - w) >> 2) - 1);
+ }
}
else {
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- movi(rn(reg), i1);
- w = bgtr(i0, r0, rn(reg));
- jit_unget_reg(reg);
- }
-
- return (w);
-}
-
-static jit_word_t
-_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
- jit_word_t w;
- jit_int32_t reg;
-
- if (i1 == 0) {
+ op = pending();
+ movi(rn(t0), i1);
+ if (sltiu)
+ SLTU(rn(t0), rn(t0), r0);
+ else
+ SLT(rn(t0), rn(t0), r0);
+ flush();
w = _jit->pc.w;
- BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- }
- else {
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- movi(rn(reg), i1);
- w = bgtr_u(i0, r0, rn(reg));
- jit_unget_reg(reg);
+ if (inv)
+ BEQ(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+ else
+ BNE(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
}
-
+ delay(op);
+ if (t0 != JIT_NOREG)
+ jit_unget_reg(t0);
return (w);
}
_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t w;
-
+ jit_int32_t op, reg;
+ /* Just to not move incorrectly instruction to delay slot */
+ reg = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, r0, r1);
+ op = pending();
+ /* implicit flush() */
w = _jit->pc.w;
BNE(r0, r1, ((i0 - w) >> 2) - 1);
- NOP(1);
-
+ delay(op);
+ if (reg != JIT_NOREG)
+ jit_unget_reg(reg);
return (w);
}
_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
jit_word_t w;
- jit_int32_t reg;
-
- if (i1 == 0) {
- w = _jit->pc.w;
- BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
- }
+ jit_int32_t op, reg;
+ if (i1 == 0)
+ w = bner(i0, r0, _ZERO_REGNO);
else {
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+ op = pending();
movi(rn(reg), i1);
- w = bner(i0, r0, rn(reg));
+ flush();
+ w = _jit->pc.w;
+ BNE(r0, rn(reg), ((i0 - w) >> 2) - 1);
+ delay(op);
jit_unget_reg(reg);
}
-
return (w);
}
static void
_jmpr(jit_state_t *_jit, jit_int32_t r0)
{
+ jit_int32_t op, t0;
+ /* make sure delay slot does not use r0 */
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+ r0, _ZERO_REGNO);
+ op = pending();
JR(r0);
- NOP(1);
+ delay(op);
+ if (t0 != JIT_NOREG)
+ jit_unget_reg(t0);
}
static jit_word_t
-_jmpi(jit_state_t *_jit, jit_word_t i0)
-{
- jit_word_t w;
- jit_int32_t reg;
-
+_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t patch)
+{
+ jit_int32_t op, t0;
+ jit_word_t w, disp;
+ /* try to get a pending instruction before the jump */
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr, _ZERO_REGNO, _ZERO_REGNO);
+ op = pending();
+ /* implicit flush() */
w = _jit->pc.w;
- if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
- J((i0 & ~0xf0000000) >> 2);
- NOP(1);
+ if (jit_mips2_p()) {
+ disp = ((i0 - w) >> 2) - 1;
+ if (patch || can_sign_extend_short_p(disp)) {
+ BEQ(_ZERO_REGNO, _ZERO_REGNO, disp);
+ goto done;
+ }
}
+ if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000))
+ J((i0 & ~0xf0000000) >> 2);
else {
- reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- movi_p(rn(reg), i0);
- jmpr(rn(reg));
- jit_unget_reg(reg);
+ if (patch)
+ w = movi_p(rn(t0), i0);
+ else
+ movi(rn(t0), i0);
+ JR(rn(t0));
}
+done:
+ delay(op);
+ jit_unget_reg(t0);
+ return (w);
+}
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+ jit_word_t w;
+ jit_int32_t op, t0;
+ /* make sure delay slot does not use _T9_REGNO */
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+ _T9_REGNO, _ZERO_REGNO);
+ op = pending();
+ /* implicit flush() */
+ w = _jit->pc.w;
+ movi_p(rn(t0), i0);
+ flush(); /* movi_p will be patched */
+ JR(rn(t0));
+ delay(op);
+ if (t0 != JIT_NOREG)
+ jit_unget_reg(t0);
return (w);
}
addr(rn(t1), r0, r1); /* t1 = r0 + r1 */
SLT(rn(t2), rn(t1), r0); /* t2 = t1 < r0 */
SLT(rn(t1), r0, rn(t1)); /* t1 = r0 < t1 */
- MOVZ(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */
+ movzr(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
/* delay slot */
addr(r0, r0, r1);
+ flush();
jit_unget_reg(t2);
jit_unget_reg(t1);
jit_unget_reg(t0);
addiu(rn(t1), r0, i1);
SLT(rn(t2), r0, rn(t1));
SLT(rn(t1), rn(t1), r0);
- MOVZ(rn(t1), rn(t2), rn(t0));
+ movzr(rn(t1), rn(t2), rn(t0));
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
/* delay slot */
addiu(r0, r0, i1);
+ flush();
jit_unget_reg(t2);
jit_unget_reg(t1);
jit_unget_reg(t0);
t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
addr(rn(t0), r0, r1);
SLTU(rn(t1), rn(t0), r0);
+ flush();
+ /* cannot optimize delay slot */
w = _jit->pc.w;
BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
/* delay slot */
movr(r0, rn(t0));
+ flush();
jit_unget_reg(t1);
jit_unget_reg(t0);
return (w);
t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
addiu(rn(t0), r0, i1);
SLTU(rn(t1), rn(t0), r0);
+ flush();
+ /* cannot optimize delay slot */
w = _jit->pc.w;
BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
/* delay slot */
movr(r0, rn(t0));
+ flush();
jit_unget_reg(t1);
jit_unget_reg(t0);
}
addr(rn(t1), r0, r1); /* t1 = r0 + r1 */
SLT(rn(t2), rn(t1), r0); /* t2 = t1 < r0 */
SLT(rn(t1), r0, rn(t1)); /* t1 = r0 < t1 */
- MOVZ(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */
+ movzr(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
/* delay slot */
addr(r0, r0, r1);
+ flush();
jit_unget_reg(t2);
jit_unget_reg(t1);
jit_unget_reg(t0);
addiu(rn(t1), r0, i1);
SLT(rn(t2), r0, rn(t1));
SLT(rn(t1), rn(t1), r0);
- MOVZ(rn(t1), rn(t2), rn(t0));
+ movzr(rn(t1), rn(t2), rn(t0));
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
/* delay slot */
addiu(r0, r0, i1);
+ flush();
jit_unget_reg(t2);
jit_unget_reg(t1);
jit_unget_reg(t0);
t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
addr(rn(t0), r0, r1);
SLTU(rn(t1), rn(t0), r0);
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
/* delay slot */
movr(r0, rn(t0));
+ flush();
jit_unget_reg(t1);
jit_unget_reg(t0);
return (w);
t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
addiu(rn(t0), r0, i1);
SLTU(rn(t1), rn(t0), r0);
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
/* delay slot */
movr(r0, rn(t0));
+ flush();
jit_unget_reg(t1);
jit_unget_reg(t0);
}
subr(rn(t1), r0, r1); /* t1 = r0 - r1 */
SLT(rn(t2), rn(t1), r0); /* t2 = t1 < r0 */
SLT(rn(t1), r0, rn(t1)); /* t1 = r0 < t1 */
- MOVZ(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */
+ movzr(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */
+ flush();
w = _jit->pc.w;
BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
/* delay slot */
subr(r0, r0, r1);
+ flush();
jit_unget_reg(t2);
jit_unget_reg(t1);
jit_unget_reg(t0);
addiu(rn(t1), r0, -i1);
SLT(rn(t2), rn(t1), r0);
SLT(rn(t1), r0, rn(t1));
- MOVZ(rn(t1), rn(t2), rn(t0));
+ movzr(rn(t1), rn(t2), rn(t0));
+ flush();
w = _jit->pc.w;
BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
/* delay slot */
addiu(r0, r0, -i1);
+ flush();
jit_unget_reg(t2);
jit_unget_reg(t1);
jit_unget_reg(t0);
t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
subr(rn(t0), r0, r1);
SLTU(rn(t1), r0, rn(t0));
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
/* delay slot */
movr(r0, rn(t0));
+ flush();
jit_unget_reg(t1);
jit_unget_reg(t0);
return (w);
t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
addiu(rn(t0), r0, -i1);
SLTU(rn(t1), r0, rn(t0));
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
/* delay slot */
movr(r0, rn(t0));
+ flush();
jit_unget_reg(t1);
jit_unget_reg(t0);
}
subr(rn(t1), r0, r1); /* t1 = r0 - r1 */
SLT(rn(t2), rn(t1), r0); /* t2 = t1 < r0 */
SLT(rn(t1), r0, rn(t1)); /* t1 = r0 < t1 */
- MOVZ(rn(t1), rn(t2), rn(t0)); /* if (t0 == 0) t1 = t2 */
+ movzr(rn(t1), rn(t2), rn(t0)); /* if (t0 == 0) t1 = t2 */
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
/* delay slot */
subr(r0, r0, r1);
+ flush();
jit_unget_reg(t2);
jit_unget_reg(t1);
jit_unget_reg(t0);
addiu(rn(t1), r0, -i1);
SLT(rn(t2), rn(t1), r0);
SLT(rn(t1), r0, rn(t1));
- MOVZ(rn(t1), rn(t2), rn(t0));
+ movzr(rn(t1), rn(t2), rn(t0));
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
/* delay slot */
addiu(r0, r0, -i1);
+ flush();
jit_unget_reg(t2);
jit_unget_reg(t1);
jit_unget_reg(t0);
t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
subr(rn(t0), r0, r1);
SLTU(rn(t1), r0, rn(t0));
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
/* delay slot */
movr(r0, rn(t0));
+ flush();
jit_unget_reg(t1);
jit_unget_reg(t0);
return (w);
t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
addiu(rn(t0), r0, -i1);
SLTU(rn(t1), r0, rn(t0));
+ /* cannot optimize delay slot */
+ flush();
w = _jit->pc.w;
BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
/* delay slot */
movr(r0, rn(t0));
+ flush();
jit_unget_reg(t1);
jit_unget_reg(t0);
}
_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t w;
- jit_int32_t t0;
- t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ jit_int32_t op, t0;
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+ op = pending();
AND(rn(t0), r0, r1);
+ flush();
w = _jit->pc.w;
BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
- NOP(1);
+ delay(op);
jit_unget_reg(t0);
return (w);
}
_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
jit_word_t w;
- jit_int32_t t0;
- t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
-
+ jit_int32_t op, t0;
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+ op = pending();
andi(rn(t0), r0, i1);
+ flush();
w = _jit->pc.w;
BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
- NOP(1);
-
+ delay(op);
jit_unget_reg(t0);
return (w);
}
_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t w;
- jit_int32_t t0;
- t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+ jit_int32_t op, t0;
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+ op = pending();
AND(rn(t0), r0, r1);
+ flush();
w = _jit->pc.w;
BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
- NOP(1);
+ delay(op);
jit_unget_reg(t0);
return (w);
}
_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
jit_word_t w;
- jit_int32_t t0;
- t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
-
+ jit_int32_t op, t0;
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+ op = pending();
andi(rn(t0), r0, i1);
+ flush();
w = _jit->pc.w;
BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
- NOP(1);
-
+ delay(op);
jit_unget_reg(t0);
return (w);
}
static void
_callr(jit_state_t *_jit, jit_int32_t r0)
{
- JALR(r0);
- if (r0 != _T9_REGNO)
- movr(_T9_REGNO, r0);
- else
- NOP(1);
+ jit_int32_t op, t0;
+ if (r0 != _T9_REGNO) {
+ JALR(r0);
+ /* delay slot */
+ movr(_T9_REGNO, r0);
+ flush();
+ }
+ else {
+ /* make sure delay slot does not use r0 */
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+ r0, _ZERO_REGNO);
+ op = pending();
+ JALR(r0);
+ delay(op);
+ if (t0 != JIT_NOREG)
+ jit_unget_reg(t0);
+ }
}
-static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t patch)
{
- if (((_jit->pc.w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
- if (can_sign_extend_short_p(i0)) {
- JAL((i0 & ~0xf0000000) >> 2);
- addiu(_T9_REGNO, _ZERO_REGNO, i0);
- return;
- }
-
- if (can_zero_extend_short_p(i0)) {
- JAL((i0 & ~0xf0000000) >> 2);
- ORI(_T9_REGNO, _ZERO_REGNO, i0);
- return;
+ jit_int32_t op, t0;
+ jit_word_t w, disp;
+ w = _jit->pc.w;
+ if (jit_mips2_p()) {
+ disp = ((i0 - w) >> 2) - 1;
+ if (patch || can_sign_extend_short_p(disp)) {
+ op = pending();
+ BGEZAL(_ZERO_REGNO, disp); /* Renamed to BAL in mips release 6 */
+ delay(op);
+ goto done;
+ }
+ }
+ assert(!patch);
+ flush();
+ if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
+ if (can_sign_extend_short_p(i0)) {
+ JAL((i0 & ~0xf0000000) >> 2);
+ /* delay slot */
+ addiu(_T9_REGNO, _ZERO_REGNO, i0);
+ }
+ else if (can_zero_extend_short_p(i0)) {
+ JAL((i0 & ~0xf0000000) >> 2);
+ /* delay slot */
+ ORI(_T9_REGNO, _ZERO_REGNO, i0);
}
-
- if (can_sign_extend_int_p(i0)) {
- if (i0 & 0xffff) {
- LUI(_T9_REGNO, i0 >> 16);
- JAL((i0 & ~0xf0000000) >> 2);
- ORI(_T9_REGNO, _T9_REGNO, i0);
- } else {
- JAL((i0 & ~0xf0000000) >> 2);
- LUI(_T9_REGNO, i0 >> 16);
+ else if (can_sign_extend_int_p(i0)) {
+ if (i0 & 0xffff) {
+ LUI(_T9_REGNO, i0 >> 16);
+ JAL((i0 & ~0xf0000000) >> 2);
+ /* delay slot */
+ ORI(_T9_REGNO, _T9_REGNO, i0);
}
- return;
+ else {
+ JAL((i0 & ~0xf0000000) >> 2);
+ /* delay slot */
+ LUI(_T9_REGNO, i0 >> 16);
+ }
}
+ else
+ goto fallback;
}
-
- movi(_T9_REGNO, i0);
- JALR(_T9_REGNO);
- NOP(1);
+ else {
+ fallback:
+ /* make sure delay slot does not use _T9_REGNO */
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+ _T9_REGNO, _ZERO_REGNO);
+ /* try to get an instruction before the call */
+ op = pending();
+ movi(_T9_REGNO, i0);
+ JALR(_T9_REGNO);
+ delay(op);
+ if (t0 != JIT_NOREG)
+ jit_unget_reg(t0);
+ }
+ done:
+ return (w);
}
static jit_word_t
_calli_p(jit_state_t *_jit, jit_word_t i0)
{
jit_word_t word;
-
+ jit_int32_t op, t0;
+ /* make sure delay slot does not use _T9_REGNO */
+ t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+ _T9_REGNO, _ZERO_REGNO);
+ op = pending();
+ /* implicit flush() */
word = _jit->pc.w;
movi_p(_T9_REGNO, i0);
JALR(_T9_REGNO);
- NOP(1);
-
+ delay(op);
+ if (t0 != JIT_NOREG)
+ jit_unget_reg(t0);
return (word);
}
-static jit_int32_t fregs[] = {
- _F30, _F28, _F26, _F24, _F22, _F20,
-#if !NEW_ABI
- _F18, _F16,
-#endif
-};
-
-static jit_int32_t iregs[] = {
- _S7, _S6, _S5, _S4, _S3, _S2, _S1, _S0,
-};
-
static void
_prolog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t index;
- jit_int32_t offset;
+ jit_int32_t reg, offs;
if (_jitc->function->define_frame || _jitc->function->assume_frame) {
jit_int32_t frame = -_jitc->function->frame;
+ jit_check_frame();
assert(_jitc->function->self.aoff >= frame);
if (_jitc->function->assume_frame)
return;
/* align stack at 8 bytes */
_jitc->function->self.aoff) + 7) & -8;
#endif
- /* callee save registers */
+
#if NEW_ABI
- if ((_jitc->function->self.call & jit_call_varargs) &&
- jit_arg_reg_p(_jitc->function->vagp))
- subi(_SP_REGNO, _SP_REGNO, stack_framesize + 64);
- else
+ if (_jitc->function->stack)
+ _jitc->function->need_stack = 1;
+ if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+ /* check if any callee save register needs to be saved */
+ for (reg = 0; reg < _jitc->reglen; ++reg)
+ if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+ (_rvs[reg].spec & jit_class_sav)) {
+ _jitc->function->need_stack = 1;
+ break;
+ }
+ }
+#else
+ /* Need always a frame due to the need to always allocate 16 bytes */
+ jit_check_frame();
#endif
- subi(_SP_REGNO, _SP_REGNO, stack_framesize);
- offset = stack_framesize - (sizeof(jit_word_t) << 1);
- for (index = 0; index < jit_size(fregs); index++, offset -= 8) {
- if (jit_regset_tstbit(&_jitc->function->regset, fregs[index]))
- stxi_d(offset, _SP_REGNO, rn(fregs[index]));
- }
- for (index = 0; index < jit_size(iregs);
- index++, offset -= sizeof(jit_word_t)) {
- if (jit_regset_tstbit(&_jitc->function->regset, iregs[index]))
- stxi(offset, _SP_REGNO, rn(iregs[index]));
- }
- assert(offset >= sizeof(jit_word_t));
- stxi(offset, _SP_REGNO, _RA_REGNO);
- stxi(0, _SP_REGNO, _BP_REGNO);
- movr(_BP_REGNO, _SP_REGNO);
+
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+ if (_jitc->function->need_frame) {
+ stxi(0, _SP_REGNO, _RA_REGNO);
+ stxi(STACK_SLOT, _SP_REGNO, _BP_REGNO);
+ }
+ /* callee save registers */
+ for (reg = 0, offs = STACK_SLOT << 1; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ stxi(offs, _SP_REGNO, rn(iregs[reg]));
+ offs += STACK_SLOT;
+ }
+ }
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+ offs += sizeof(jit_float64_t);
+ }
+ }
+
+ if (_jitc->function->need_frame)
+ movr(_BP_REGNO, _SP_REGNO);
/* alloca */
if (_jitc->function->stack)
subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
if (_jitc->function->allocar) {
- index = jit_get_reg(jit_class_gpr);
- movi(rn(index), _jitc->function->self.aoff);
- stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(index));
- jit_unget_reg(index);
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), _jitc->function->self.aoff);
+ stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(reg));
+ jit_unget_reg(reg);
}
if (_jitc->function->self.call & jit_call_varargs) {
+ for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg) {
+ offs = jit_framesize() - ((NUM_WORD_ARGS - reg) * STACK_SLOT);
#if NEW_ABI
- index = _jitc->function->vagp;
+ SD(rn(_A0 - reg), offs, _BP_REGNO);
#else
- index = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
-#endif
- offset = stack_framesize + index * STACK_SLOT;
- for (; jit_arg_reg_p(index); ++index, offset += STACK_SLOT) {
-#if NEW_ABI
- SD(rn(_A0 - index), offset, _BP_REGNO);
-#else
- stxi(offset + WORD_ADJUST, _BP_REGNO, rn(_A0 - index));
+ offs += 16 + WORD_ADJUST;
+ stxi(offs, _BP_REGNO, rn(_A0 - reg));
#endif
}
}
static void
_epilog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t index;
- jit_int32_t offset;
+ jit_int32_t reg, offs;
if (_jitc->function->assume_frame)
return;
+
+ if (_jitc->function->need_frame) {
+ movr(_SP_REGNO, _BP_REGNO);
+ ldxi(_RA_REGNO, _SP_REGNO, 0);
+ ldxi(_BP_REGNO, _SP_REGNO, STACK_SLOT);
+ }
+
/* callee save registers */
- movr(_SP_REGNO, _BP_REGNO);
- offset = stack_framesize - (sizeof(jit_word_t) << 1);
- for (index = 0; index < jit_size(fregs); index++, offset -= 8) {
- if (jit_regset_tstbit(&_jitc->function->regset, fregs[index]))
- ldxi_d(rn(fregs[index]), _SP_REGNO, offset);
- }
- for (index = 0; index < jit_size(iregs);
- index++, offset -= sizeof(jit_word_t)) {
- if (jit_regset_tstbit(&_jitc->function->regset, iregs[index]))
- ldxi(rn(iregs[index]), _SP_REGNO, offset);
- }
- assert(offset >= sizeof(jit_word_t));
- ldxi(_RA_REGNO, _SP_REGNO, offset);
- ldxi(_BP_REGNO, _SP_REGNO, 0);
+ for (reg = 0, offs = STACK_SLOT << 1; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+ offs += sizeof(jit_word_t);
+ }
+ }
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+ offs += sizeof(jit_float64_t);
+ }
+ }
JR(_RA_REGNO);
/* delay slot */
-#if NEW_ABI
- if ((_jitc->function->self.call & jit_call_varargs) &&
- jit_arg_reg_p(_jitc->function->vagp))
- addi(_SP_REGNO, _SP_REGNO, stack_framesize + 64);
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ addi(_SP_REGNO, _SP_REGNO, jit_framesize());
else
-#endif
- addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+ NOP(1);
+ flush();
}
static void
_vastart(jit_state_t *_jit, jit_int32_t r0)
{
assert(_jitc->function->self.call & jit_call_varargs);
- /* Initialize va_list to the first stack argument. */
#if NEW_ABI
+ /* Initialize va_list to the first stack argument. */
if (jit_arg_reg_p(_jitc->function->vagp))
- addi(r0, _BP_REGNO, stack_framesize + _jitc->function->vagp *
- sizeof(jit_int64_t));
+ addi(r0, _BP_REGNO,
+ jit_framesize() -
+ ((NUM_WORD_ARGS - _jitc->function->vagp) * STACK_SLOT));
else
#endif
- addi(r0, _BP_REGNO, _jitc->function->self.size);
+ addi(r0, _BP_REGNO, jit_selfsize());
}
static void
break;
case MIPS_COP1: case MIPS_COP2:
- assert(i.rs.b == MIPS_BC);
- switch (i.rt.b) {
- case MIPS_BCF: case MIPS_BCFL:
- case MIPS_BCT: case MIPS_BCTL:
- i.is.b = ((label - instr) >> 2) - 1;
- u.i[0] = i.op;
- break;
- default:
- assert(!"unhandled branch opcode");
- break;
+ if (jit_mips6_p()) {
+ switch (i.rs.b) {
+ case MIPS_BC1EQZ: case MIPS_BC1NEZ:
+ assert(jit_mips6_p());
+ i.is.b = ((label - instr) >> 2) - 1;
+ u.i[0] = i.op;
+ break;
+ default:
+ assert(!"unhandled branch opcode");
+ break;
+ }
+ }
+ else {
+ assert(i.rs.b == MIPS_BC);
+ switch (i.rt.b) {
+ case MIPS_BCF: case MIPS_BCFL:
+ case MIPS_BCT: case MIPS_BCTL:
+ assert(!jit_mips6_p());
+ i.is.b = ((label - instr) >> 2) - 1;
+ u.i[0] = i.op;
+ break;
+ default:
+ assert(!"unhandled branch opcode");
+ break;
+ }
}
break;
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# define MIPS_fmt_PS 0x16 /* 2 x float32 */
# define MIPS_fmt_S_PU 0x20
# define MIPS_fmt_S_PL 0x26
+# define MIPS_condn_S 0x14 /* release 6 */
+# define MIPS_condn_D 0x15 /* release 6 */
# define MIPS_ADD_fmt 0x00
# define MIPS_LWXC1 0x00
# define MIPS_SUB_fmt 0x01
# define MIPS_cond_NGE 0x3d
# define MIPS_cond_LE 0x3e
# define MIPS_cond_UGT 0x3f
+/* Mips release 6 */
+# define MIPS_cmp_AF 0x00
+# define MIPS_cmp_UN 0x01
+# define MIPS_cmp_EQ 0x02
+# define MIPS_cmp_UEQ 0x03
+# define MIPS_cmp_LT 0x04
+# define MIPS_cmp_ULT 0x05
+# define MIPS_cmp_LE 0x06
+# define MIPS_cmp_ULE 0x07
+# define MIPS_cmp_SAF 0x08
+# define MIPS_cmp_SUN 0x09
+# define MIPS_cmp_SEQ 0x0a
+# define MIPS_cmp_SUEQ 0x0b
+# define MIPS_cmp_SLT 0x0c
+# define MIPS_cmp_SULT 0x0d
+# define MIPS_cmp_SLE 0x0e
+# define MIPS_cmp_SULE 0x0f
# define ADD_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_ADD_fmt)
# define ADD_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_ADD_fmt)
# define SUB_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_SUB_fmt)
# define SQRT_S(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_SQRT_fmt)
# define SQRT_D(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_SQRT_fmt)
# define MFC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MF,rt,fs,0,0)
+# define MFHC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MFH,rt,fs,0,0)
# define MTC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MT,rt,fs,0,0)
+# define MTHC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MTH,rt,fs,0,0)
# define DMFC1(rt, fs) hrrrit(MIPS_COP1,MIPS_DMF,rt,fs,0,0)
# define DMTC1(rt, fs) hrrrit(MIPS_COP1,MIPS_DMT,rt,fs,0,0)
# define CVT_D_S(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_D)
# define MOV_S(fd, fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_MOV_fmt)
# define MOV_D(fd, fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_MOV_fmt)
# define BC1F(im) hrri(MIPS_COP1,MIPS_BC,MIPS_BCF,im)
+# define BC1EQZ(ft,im) hrri(MIPS_COP1,MIPS_BC1EQZ,ft,im)
# define BC1T(im) hrri(MIPS_COP1,MIPS_BC,MIPS_BCT,im)
+# define BC1NEZ(ft,im) hrri(MIPS_COP1,MIPS_BC1NEZ,ft,im)
# define C_F_S(fs,ft) c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_F)
# define C_F_D(fs,ft) c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_F)
# define C_F_PS(fs,ft) c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_F)
static void
_c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
jit_int32_t ft, jit_int32_t fs, jit_int32_t cc);
+# define CMP_AF_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_AF)
+# define CMP_AF_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_AF)
+# define CMP_UN_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UN)
+# define CMP_UN_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UN)
+# define CMP_EQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_EQ)
+# define CMP_EQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_EQ)
+# define CMP_UEQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UEQ)
+# define CMP_UEQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UEQ)
+# define CMP_LT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LT)
+# define CMP_LT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LT)
+# define CMP_ULT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULT)
+# define CMP_ULT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULT)
+# define CMP_LE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LE)
+# define CMP_LE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LE)
+# define CMP_ULE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULE)
+# define CMP_ULE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULE)
+# define CMP_SAF_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SAF)
+# define CMP_SAF_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SAF)
+# define CMP_SUN_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUN)
+# define CMP_SUN_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUN)
+# define CMP_SEQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SEQ)
+# define CMP_SEQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SEQ)
+# define CMP_SUEQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUEQ)
+# define CMP_SUEQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUEQ)
+# define CMP_SLT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLT)
+# define CMP_SLT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLT)
+# define CMP_SULT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULT)
+# define CMP_SULT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULT)
+# define CMP_SLE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLE)
+# define CMP_SLE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLE)
+# define CMP_SULE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULE)
+# define CMP_SULE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULE)
+# define cmp_cond_fmt(fm,fd,ft,fs,cn) _cmp_cond_fmt(_jit,fm,fd,ft,fs,cn)
+static void
+_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd,
+ jit_int32_t ft, jit_int32_t fs, jit_int32_t cn);
# define addr_f(r0,r1,r2) ADD_S(r0,r1,r2)
# define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0)
static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
# define sqrtr_f(r0,r1) SQRT_S(r0,r1)
# define sqrtr_d(r0,r1) SQRT_D(r0,r1)
# define movr_w_f(r0, r1) MTC1(r1, r0)
-# define movr_f_w(r0, r1) MFC1(r1, r0)
+# define movr_f_w(r0, r1) MFC1(r0, r1)
# define movi_f_w(r0, i0) _movi_f_w(_jit, r0, i0)
static void _movi_f_w(jit_state_t*,jit_int32_t,jit_float32_t*);
# define extr_f(r0, r1) _extr_f(_jit, r0, r1)
i.ft.b = ft;
i.fm.b = fm;
i.hc.b = MIPS_COP1;
- ii(i.op);
+ instr(i.op);
+}
+
+static void
+_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd,
+ jit_int32_t ft, jit_int32_t fs, jit_int32_t cn)
+{
+ jit_instr_t i;
+ i.op = 0; /* must have bit 6 zero ed */
+ i.cn.b = cn;
+ i.ft.b = ft;
+ i.fs.b = fs;
+ i.fd.b = fd;
+ i.fm.b = fm;
+ i.hc.b = MIPS_COP1;
+ instr(i.op);
}
# define fpr_opi(name, type, size) \
_movr_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
assert(r1 == r2 - 1);
- MTC1(r1, r0 + BE_P);
- MTC1(r2, r0 + LE_P);
+ if (jit_mips6_p()) {
+ MTC1(r1, r0);
+ MTHC1(r2, r0);
+ }
+ else {
+ MTC1(r1, r0 + BE_P);
+ MTC1(r2, r0 + LE_P);
+ }
}
static void
_movr_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
assert(r0 == r1 - 1);
- MFC1(r0, r2 + BE_P);
- MFC1(r1, r2 + LE_P);
+ if (jit_mips6_p()) {
+ MFC1(r0, r2);
+ MFHC1(r1, r2);
+ }
+ else {
+ MFC1(r0, r2 + BE_P);
+ MFC1(r1, r2 + LE_P);
+ }
}
static void
static void
_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
-# if __WORDSIZE == 64 || NEW_ABI
- LDC1(r0, 0, r1);
-# else
- LWC1(r0 + BE_P, 0, r1);
- LWC1(r0 + LE_P, 4, r1);
-# endif
+ if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI)
+ LDC1(r0, 0, r1);
+ else {
+ LWC1(r0 + BE_P, 0, r1);
+ LWC1(r0 + LE_P, 4, r1);
+ }
}
static void
_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
-# if __WORDSIZE == 64 || NEW_ABI
- if (can_sign_extend_short_p(i0))
- LDC1(r0, i0, _ZERO_REGNO);
- else {
- reg = jit_get_reg(jit_class_gpr);
- movi(rn(reg), i0);
- LDC1(r0, 0, rn(reg));
- jit_unget_reg(reg);
- }
-# else
- if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
- LWC1(r0 + BE_P, i0, _ZERO_REGNO);
- LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+ if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+ if (can_sign_extend_short_p(i0))
+ LDC1(r0, i0, _ZERO_REGNO);
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ LDC1(r0, 0, rn(reg));
+ jit_unget_reg(reg);
+ }
}
else {
- reg = jit_get_reg(jit_class_gpr);
- movi(rn(reg), i0);
- LWC1(r0 + BE_P, 0, rn(reg));
- LWC1(r0 + LE_P, 4, rn(reg));
- jit_unget_reg(reg);
+ if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+ LWC1(r0 + BE_P, i0, _ZERO_REGNO);
+ LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ ldr_d(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
}
-# endif
}
static void
_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
-# if __WORDSIZE == 64 || NEW_ABI
- if (can_sign_extend_short_p(i0))
- LDC1(r0, i0, r1);
-# else
- if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
- LWC1(r0 + BE_P, i0, r1);
- LWC1(r0 + LE_P, i0 + 4, r1);
+ if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+ if (can_sign_extend_short_p(i0))
+ LDC1(r0, i0, r1);
+ else
+ goto fallback;
}
-# endif
else {
- reg = jit_get_reg(jit_class_gpr);
- addi(rn(reg), r1, i0);
- ldr_d(r0, rn(reg));
- jit_unget_reg(reg);
+ if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+ LWC1(r0 + BE_P, i0, r1);
+ LWC1(r0 + LE_P, i0 + 4, r1);
+ }
+ else {
+ fallback:
+ reg = jit_get_reg(jit_class_gpr);
+ addi(rn(reg), r1, i0);
+ ldr_d(r0, rn(reg));
+ jit_unget_reg(reg);
+ }
}
}
static void
_str_d(jit_state_t *_jit,jit_int32_t r0, jit_int32_t r1)
{
-# if __WORDSIZE == 64 || NEW_ABI
- SDC1(r1, 0, r0);
-# else
- SWC1(r1 + BE_P, 0, r0);
- SWC1(r1 + LE_P, 4, r0);
-# endif
+ if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI)
+ SDC1(r1, 0, r0);
+ else {
+ SWC1(r1 + BE_P, 0, r0);
+ SWC1(r1 + LE_P, 4, r0);
+ }
}
static void
_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
-# if __WORDSIZE == 64 || NEW_ABI
- if (can_sign_extend_short_p(i0))
- SDC1(r0, i0, _ZERO_REGNO);
-# else
- if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
- SWC1(r0 + BE_P, i0, _ZERO_REGNO);
- SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+ if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+ if (can_sign_extend_short_p(i0))
+ SDC1(r0, i0, _ZERO_REGNO);
+ else
+ goto fallback;
}
-# endif
else {
- reg = jit_get_reg(jit_class_gpr);
- movi(rn(reg), i0);
- str_d(rn(reg), r0);
- jit_unget_reg(reg);
+ if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+ SWC1(r0 + BE_P, i0, _ZERO_REGNO);
+ SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+ }
+ else {
+ fallback:
+ reg = jit_get_reg(jit_class_gpr);
+ movi(rn(reg), i0);
+ str_d(rn(reg), r0);
+ jit_unget_reg(reg);
+ }
}
}
_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_int32_t reg;
-# if __WORDSIZE == 64 || NEW_ABI
- if (can_sign_extend_short_p(i0))
- SDC1(r1, i0, r0);
-# else
- if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
- SWC1(r1 + BE_P, i0, r0);
- SWC1(r1 + LE_P, i0 + 4, r0);
+ if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+ if (can_sign_extend_short_p(i0))
+ SDC1(r1, i0, r0);
+ else
+ goto fallback;
}
-# endif
else {
- reg = jit_get_reg(jit_class_gpr);
- addi(rn(reg), r0, i0);
- str_d(rn(reg), r1);
- jit_unget_reg(reg);
+ if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+ SWC1(r1 + BE_P, i0, r0);
+ SWC1(r1 + LE_P, i0 + 4, r0);
+ }
+ else {
+ fallback:
+ reg = jit_get_reg(jit_class_gpr);
+ addi(rn(reg), r0, i0);
+ str_d(rn(reg), r1);
+ jit_unget_reg(reg);
+ }
}
}
else
DMTC1(_ZERO_REGNO, r0);
# else
- if (_jitc->no_data)
- reg = jit_get_reg(jit_class_gpr);
- if (data.i[0]) {
+ if (jit_mips6_p()) {
if (_jitc->no_data) {
- movi(rn(reg), data.i[0]);
- MTC1(rn(reg), r0 + BE_P);
+ reg = jit_get_reg(jit_class_gpr);
+# if __WORDSIZE == 64
+ movi(rn(reg), data.l);
+ DMTC1(rn(reg), r0);
+# else
+ movi(rn(reg), data.i[0 + BE_P]);
+ MTC1(rn(reg), r0);
+ movi(rn(reg), data.i[0 + LE_P]);
+ MTHC1(rn(reg), r0);
+# endif
+ jit_unget_reg(reg);
}
else
- ldi_f(r0 + BE_P, (jit_word_t)i0);
+ ldi_d(r0, (jit_word_t)i0);
}
- else
- MTC1(_ZERO_REGNO, r0 + BE_P);
- if (data.i[1]) {
- if (_jitc->no_data) {
- movi(rn(reg), data.i[1]);
- MTC1(rn(reg), r0 + LE_P);
+ else {
+ if (_jitc->no_data)
+ reg = jit_get_reg(jit_class_gpr);
+ if (data.i[0]) {
+ if (_jitc->no_data) {
+ movi(rn(reg), data.i[0]);
+ MTC1(rn(reg), r0 + BE_P);
+ }
+ else
+ ldi_f(r0 + BE_P, (jit_word_t)i0);
}
else
- ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4);
+ MTC1(_ZERO_REGNO, r0 + BE_P);
+ if (data.i[1]) {
+ if (_jitc->no_data) {
+ movi(rn(reg), data.i[1]);
+ MTC1(rn(reg), r0 + LE_P);
+ }
+ else
+ ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4);
+ }
+ else
+ MTC1(_ZERO_REGNO, r0 + LE_P);
+ if (_jitc->no_data)
+ jit_unget_reg(reg);
}
- else
- MTC1(_ZERO_REGNO, r0 + LE_P);
- if (_jitc->no_data)
- jit_unget_reg(reg);
# endif
}
_ltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLT_S(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_LT_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_OLT_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(lt)
_ler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLE_S(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_LE_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_OLE_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(le)
_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_EQ_S(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_EQ_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_EQ_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(eq)
_ger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULT_S(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_ULT_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_ULT_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(ge)
_gtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULE_S(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_ULE_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_ULE_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(gt)
_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_EQ_S(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_EQ_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_EQ_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(ne)
_unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULT_S(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_ULT_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_ULT_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(unlt)
_unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULE_S(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_ULE_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_ULE_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(unle)
_uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UEQ_S(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_UEQ_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_UEQ_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(uneq)
_unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLT_S(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_LT_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_OLT_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(unge)
_ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLE_S(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_LE_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_OLE_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(ungt)
_ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UEQ_S(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_UEQ_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_UEQ_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(ltgt)
_ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UN_S(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_UN_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_UN_S(r1, r2);
+ flush();
+ /* cannot optimize delay slot */
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(ord)
_unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UN_S(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_UN_S(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_UN_S(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
fopi(unord)
_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLT_S(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_LT_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_OLT_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(lt)
_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLE_S(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_LE_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_OLE_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(le)
_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_EQ_S(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_EQ_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_EQ_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(eq)
_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULT_S(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_ULT_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_ULT_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(ge)
_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULE_S(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_ULE_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_ULE_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(gt)
_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_EQ_S(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_EQ_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_EQ_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(ne)
_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULT_S(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_ULT_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_ULT_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(unlt)
_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULE_S(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_ULE_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_ULE_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(unle)
_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UEQ_S(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_UEQ_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_UEQ_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(uneq)
_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLT_S(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_LT_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_OLT_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(unge)
_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLE_S(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_LE_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_OLE_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(ungt)
_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UEQ_S(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_UEQ_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_UEQ_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(ltgt)
_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UN_S(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_UN_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_UN_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(ord)
_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UN_S(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_UN_S(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_UN_S(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
fbopi(unord)
_ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLT_D(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_LT_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_OLT_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(lt)
_ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLE_D(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_LE_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_OLE_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(le)
_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_EQ_D(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_EQ_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_EQ_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(eq)
_ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULT_D(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_ULT_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_ULT_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(ge)
_gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULE_D(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_ULE_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_ULE_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(gt)
_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_EQ_D(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_EQ_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_EQ_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(ne)
_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULT_D(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_ULT_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_ULT_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(unlt)
_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULE_D(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_ULE_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_ULE_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(unle)
_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UEQ_D(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_UEQ_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_UEQ_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(uneq)
_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLT_D(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_LT_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_OLT_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(unge)
_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLE_D(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_LE_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_OLE_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(ungt)
_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UEQ_D(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_UEQ_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_UEQ_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(ltgt)
_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UN_D(r1, r2);
- w = _jit->pc.w;
- BC1F(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_UN_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ addi(r0, r0, 1);
+ }
+ else {
+ C_UN_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1F(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(ord)
_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UN_D(r1, r2);
- w = _jit->pc.w;
- BC1T(0);
- /* delay slot */
- movi(r0, 1);
- movi(r0, 0);
- patch_at(w, _jit->pc.w);
+ jit_int32_t reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg(jit_class_fpr);
+ CMP_UN_D(rn(reg), r1, r2);
+ MFC1(r0, rn(reg));
+ jit_unget_reg(reg);
+ andi(r0, r0, 1);
+ }
+ else {
+ C_UN_D(r1, r2);
+ /* cannot optimize delay slot */
+ flush();
+ w = _jit->pc.w;
+ BC1T(0);
+ /* delay slot */
+ movi(r0, 1);
+ movi(r0, 0);
+ flush();
+ patch_at(w, _jit->pc.w);
+ }
}
dopi(unord)
_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLT_D(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_LT_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_OLT_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(lt)
_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLE_D(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_LE_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_OLE_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(le)
_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_EQ_D(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_EQ_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_EQ_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(eq)
_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULT_D(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_ULT_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_ULT_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(ge)
_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULE_D(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_ULE_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_ULE_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(gt)
_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_EQ_D(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_EQ_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_EQ_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(ne)
_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULT_D(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_ULT_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_ULT_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(unlt)
_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_ULE_D(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_ULE_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_ULE_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(unle)
_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UEQ_D(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_UEQ_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_UEQ_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(uneq)
_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLT_D(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_LT_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_OLT_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(unge)
_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_OLE_D(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_LE_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_OLE_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(ungt)
_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UEQ_D(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_UEQ_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_UEQ_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(ltgt)
_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UN_D(r1, r2);
- w = _jit->pc.w;
- BC1F(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_UN_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_UN_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1F(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(ord)
_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
{
jit_word_t w;
- C_UN_D(r1, r2);
- w = _jit->pc.w;
- BC1T(((i0 - w) >> 2) - 1);
- NOP(1);
+ jit_int32_t op, reg;
+ if (jit_mips6_p()) {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+ op = pending();
+ CMP_UN_D(rn(reg), r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+ }
+ else {
+ reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+ op = pending();
+ C_UN_D(r1, r2);
+ flush();
+ w = _jit->pc.w;
+ BC1T(((i0 - w) >> 2) - 1);
+ }
+ delay(op);
+ jit_unget_reg(reg);
return (w);
}
dbopi(unord)
#if __WORDSIZE == 32
-#if NEW_ABI
-#define JIT_INSTR_MAX 52
- 0, /* data */
- 0, /* live */
- 0, /* align */
- 0, /* save */
- 0, /* load */
- 0, /* #name */
- 0, /* #note */
- 0, /* label */
- 44, /* prolog */
- 0, /* ellipsis */
- 0, /* va_push */
- 0, /* allocai */
- 0, /* allocar */
- 0, /* arg */
- 0, /* getarg_c */
- 0, /* getarg_uc */
- 0, /* getarg_s */
- 0, /* getarg_us */
- 0, /* getarg_i */
- 0, /* getarg_ui */
- 0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
- 0, /* va_start */
- 0, /* va_arg */
- 0, /* va_arg_d */
- 0, /* va_end */
- 4, /* addr */
- 12, /* addi */
- 12, /* addcr */
- 20, /* addci */
- 28, /* addxr */
- 28, /* addxi */
- 4, /* subr */
- 12, /* subi */
- 12, /* subcr */
- 20, /* subci */
- 28, /* subxr */
- 28, /* subxi */
- 16, /* rsbi */
- 4, /* mulr */
- 12, /* muli */
- 12, /* qmulr */
- 20, /* qmuli */
- 12, /* qmulr_u */
- 20, /* qmuli_u */
- 8, /* divr */
- 16, /* divi */
- 8, /* divr_u */
- 16, /* divi_u */
- 12, /* qdivr */
- 16, /* qdivi */
- 12, /* qdivr_u */
- 16, /* qdivi_u */
- 8, /* remr */
- 16, /* remi */
- 8, /* remr_u */
- 16, /* remi_u */
- 4, /* andr */
- 12, /* andi */
- 4, /* orr */
- 12, /* ori */
- 4, /* xorr */
- 12, /* xori */
- 4, /* lshr */
- 4, /* lshi */
- 4, /* rshr */
- 4, /* rshi */
- 4, /* rshr_u */
- 4, /* rshi_u */
- 4, /* negr */
- 8, /* comr */
- 4, /* ltr */
- 4, /* lti */
- 4, /* ltr_u */
- 4, /* lti_u */
- 8, /* ler */
- 12, /* lei */
- 8, /* ler_u */
- 12, /* lei_u */
- 12, /* eqr */
- 12, /* eqi */
- 8, /* ger */
- 12, /* gei */
- 8, /* ger_u */
- 12, /* gei_u */
- 4, /* gtr */
- 8, /* gti */
- 4, /* gtr_u */
- 8, /* gti_u */
- 8, /* ner */
- 8, /* nei */
- 4, /* movr */
- 8, /* movi */
- 4, /* movnr */
- 4, /* movzr */
- 8, /* extr_c */
- 4, /* extr_uc */
- 8, /* extr_s */
- 4, /* extr_us */
- 0, /* extr_i */
- 0, /* extr_ui */
- 4, /* htonr_us */
- 4, /* htonr_ui */
- 0, /* htonr_ul */
- 4, /* ldr_c */
- 12, /* ldi_c */
- 4, /* ldr_uc */
- 12, /* ldi_uc */
- 4, /* ldr_s */
- 12, /* ldi_s */
- 4, /* ldr_us */
- 12, /* ldi_us */
- 4, /* ldr_i */
- 12, /* ldi_i */
- 0, /* ldr_ui */
- 0, /* ldi_ui */
- 0, /* ldr_l */
- 0, /* ldi_l */
- 8, /* ldxr_c */
- 4, /* ldxi_c */
- 8, /* ldxr_uc */
- 4, /* ldxi_uc */
- 8, /* ldxr_s */
- 4, /* ldxi_s */
- 8, /* ldxr_us */
- 4, /* ldxi_us */
- 8, /* ldxr_i */
- 4, /* ldxi_i */
- 0, /* ldxr_ui */
- 0, /* ldxi_ui */
- 0, /* ldxr_l */
- 0, /* ldxi_l */
- 4, /* str_c */
- 12, /* sti_c */
- 4, /* str_s */
- 12, /* sti_s */
- 4, /* str_i */
- 12, /* sti_i */
- 0, /* str_l */
- 0, /* sti_l */
- 8, /* stxr_c */
- 4, /* stxi_c */
- 8, /* stxr_s */
- 4, /* stxi_s */
- 8, /* stxr_i */
- 4, /* stxi_i */
- 0, /* stxr_l */
- 0, /* stxi_l */
- 12, /* bltr */
- 12, /* blti */
- 12, /* bltr_u */
- 12, /* blti_u */
- 12, /* bler */
- 16, /* blei */
- 12, /* bler_u */
- 16, /* blei_u */
- 8, /* beqr */
- 16, /* beqi */
- 12, /* bger */
- 12, /* bgei */
- 12, /* bger_u */
- 12, /* bgei_u */
- 12, /* bgtr */
- 16, /* bgti */
- 12, /* bgtr_u */
- 16, /* bgti_u */
- 8, /* bner */
- 16, /* bnei */
- 12, /* bmsr */
- 12, /* bmsi */
- 12, /* bmcr */
- 12, /* bmci */
- 28, /* boaddr */
- 28, /* boaddi */
- 16, /* boaddr_u */
- 20, /* boaddi_u */
- 28, /* bxaddr */
- 28, /* bxaddi */
- 16, /* bxaddr_u */
- 20, /* bxaddi_u */
- 28, /* bosubr */
- 28, /* bosubi */
- 16, /* bosubr_u */
- 20, /* bosubi_u */
- 28, /* bxsubr */
- 28, /* bxsubi */
- 16, /* bxsubr_u */
- 20, /* bxsubi_u */
- 0, /* jmpr */
- 8, /* jmpi */
- 12, /* callr */
- 16, /* calli */
- 0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
- 0, /* finishr */
- 0, /* finishi */
- 0, /* ret */
- 0, /* retr */
- 0, /* reti */
- 0, /* retval_c */
- 0, /* retval_uc */
- 0, /* retval_s */
- 0, /* retval_us */
- 0, /* retval_i */
- 0, /* retval_ui */
- 0, /* retval_l */
- 44, /* epilog */
- 0, /* arg_f */
- 0, /* getarg_f */
- 0, /* putargr_f */
- 0, /* putargi_f */
- 4, /* addr_f */
- 16, /* addi_f */
- 4, /* subr_f */
- 16, /* subi_f */
- 16, /* rsbi_f */
- 4, /* mulr_f */
- 16, /* muli_f */
- 4, /* divr_f */
- 16, /* divi_f */
- 4, /* negr_f */
- 4, /* absr_f */
- 4, /* sqrtr_f */
- 16, /* ltr_f */
- 28, /* lti_f */
- 16, /* ler_f */
- 28, /* lei_f */
- 16, /* eqr_f */
- 28, /* eqi_f */
- 16, /* ger_f */
- 28, /* gei_f */
- 16, /* gtr_f */
- 28, /* gti_f */
- 16, /* ner_f */
- 28, /* nei_f */
- 16, /* unltr_f */
- 28, /* unlti_f */
- 16, /* unler_f */
- 28, /* unlei_f */
- 16, /* uneqr_f */
- 28, /* uneqi_f */
- 16, /* unger_f */
- 28, /* ungei_f */
- 16, /* ungtr_f */
- 28, /* ungti_f */
- 16, /* ltgtr_f */
- 28, /* ltgti_f */
- 16, /* ordr_f */
- 28, /* ordi_f */
- 16, /* unordr_f */
- 28, /* unordi_f */
- 8, /* truncr_f_i */
- 0, /* truncr_f_l */
- 8, /* extr_f */
- 4, /* extr_d_f */
- 4, /* movr_f */
- 12, /* movi_f */
- 4, /* ldr_f */
- 12, /* ldi_f */
- 8, /* ldxr_f */
- 4, /* ldxi_f */
- 4, /* str_f */
- 12, /* sti_f */
- 8, /* stxr_f */
- 4, /* stxi_f */
- 12, /* bltr_f */
- 24, /* blti_f */
- 12, /* bler_f */
- 24, /* blei_f */
- 12, /* beqr_f */
- 24, /* beqi_f */
- 12, /* bger_f */
- 24, /* bgei_f */
- 12, /* bgtr_f */
- 24, /* bgti_f */
- 12, /* bner_f */
- 24, /* bnei_f */
- 12, /* bunltr_f */
- 24, /* bunlti_f */
- 12, /* bunler_f */
- 24, /* bunlei_f */
- 12, /* buneqr_f */
- 24, /* buneqi_f */
- 12, /* bunger_f */
- 24, /* bungei_f */
- 12, /* bungtr_f */
- 24, /* bungti_f */
- 12, /* bltgtr_f */
- 24, /* bltgti_f */
- 12, /* bordr_f */
- 24, /* bordi_f */
- 12, /* bunordr_f */
- 24, /* bunordi_f */
- 0, /* pushargr_f */
- 0, /* pushargi_f */
- 0, /* retr_f */
- 0, /* reti_f */
- 0, /* retval_f */
- 0, /* arg_d */
- 0, /* getarg_d */
- 0, /* putargr_d */
- 0, /* putargi_d */
- 4, /* addr_d */
- 16, /* addi_d */
- 4, /* subr_d */
- 16, /* subi_d */
- 16, /* rsbi_d */
- 4, /* mulr_d */
- 16, /* muli_d */
- 4, /* divr_d */
- 16, /* divi_d */
- 4, /* negr_d */
- 4, /* absr_d */
- 4, /* sqrtr_d */
- 16, /* ltr_d */
- 28, /* lti_d */
- 16, /* ler_d */
- 28, /* lei_d */
- 16, /* eqr_d */
- 28, /* eqi_d */
- 16, /* ger_d */
- 28, /* gei_d */
- 16, /* gtr_d */
- 28, /* gti_d */
- 16, /* ner_d */
- 28, /* nei_d */
- 16, /* unltr_d */
- 28, /* unlti_d */
- 16, /* unler_d */
- 28, /* unlei_d */
- 16, /* uneqr_d */
- 28, /* uneqi_d */
- 16, /* unger_d */
- 28, /* ungei_d */
- 16, /* ungtr_d */
- 28, /* ungti_d */
- 16, /* ltgtr_d */
- 28, /* ltgti_d */
- 16, /* ordr_d */
- 28, /* ordi_d */
- 16, /* unordr_d */
- 28, /* unordi_d */
- 8, /* truncr_d_i */
- 0, /* truncr_d_l */
- 8, /* extr_d */
- 4, /* extr_f_d */
- 4, /* movr_d */
- 12, /* movi_d */
- 4, /* ldr_d */
- 12, /* ldi_d */
- 8, /* ldxr_d */
- 4, /* ldxi_d */
- 4, /* str_d */
- 12, /* sti_d */
- 8, /* stxr_d */
- 4, /* stxi_d */
- 12, /* bltr_d */
- 24, /* blti_d */
- 12, /* bler_d */
- 24, /* blei_d */
- 12, /* beqr_d */
- 24, /* beqi_d */
- 12, /* bger_d */
- 24, /* bgei_d */
- 12, /* bgtr_d */
- 24, /* bgti_d */
- 12, /* bner_d */
- 24, /* bnei_d */
- 12, /* bunltr_d */
- 24, /* bunlti_d */
- 12, /* bunler_d */
- 24, /* bunlei_d */
- 12, /* buneqr_d */
- 24, /* buneqi_d */
- 12, /* bunger_d */
- 24, /* bungei_d */
- 12, /* bungtr_d */
- 24, /* bungti_d */
- 12, /* bltgtr_d */
- 24, /* bltgti_d */
- 12, /* bordr_d */
- 24, /* bordi_d */
- 12, /* bunordr_d */
- 24, /* bunordi_d */
- 0, /* pushargr_d */
- 0, /* pushargi_d */
- 0, /* retr_d */
- 0, /* reti_d */
- 0, /* retval_d */
- 0, /* movr_w_f */
- 0, /* movr_ww_d */
- 0, /* movr_w_d */
- 0, /* movr_f_w */
- 0, /* movi_f_w */
- 0, /* movr_d_ww */
- 0, /* movi_d_ww */
- 4, /* movr_d_w */
- 12, /* movi_d_w */
- 20, /* bswapr_us */
- 52, /* bswapr_ui */
- 0, /* bswapr_ul */
- 36, /* casr */
- 44, /* casi */
-#endif /* NEW_ABI */
-#endif /* __WORDSIZE */
-
-#if __WORDSIZE == 32
-#if !NEW_ABI
#define JIT_INSTR_MAX 116
0, /* data */
0, /* live */
- 0, /* align */
+ 20, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
20, /* va_arg_d */
8, /* remr_u */
16, /* remi_u */
4, /* andr */
- 12, /* andi */
+ 8, /* andi */
4, /* orr */
12, /* ori */
4, /* xorr */
4, /* ltr_u */
4, /* lti_u */
8, /* ler */
- 12, /* lei */
+ 4, /* lei */
8, /* ler_u */
- 12, /* lei_u */
- 12, /* eqr */
- 12, /* eqi */
+ 4, /* lei_u */
+ 8, /* eqr */
+ 8, /* eqi */
8, /* ger */
- 12, /* gei */
+ 8, /* gei */
8, /* ger_u */
- 12, /* gei_u */
+ 8, /* gei_u */
4, /* gtr */
8, /* gti */
4, /* gtr_u */
8, /* movi */
4, /* movnr */
4, /* movzr */
- 8, /* extr_c */
+ 36, /* casr */
+ 44, /* casi */
+ 4, /* extr_c */
4, /* extr_uc */
- 8, /* extr_s */
+ 4, /* extr_s */
4, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
- 20, /* htonr_us */
- 52, /* htonr_ui */
+ 8, /* bswapr_us */
+ 8, /* bswapr_ui */
+ 0, /* bswapr_ul */
+ 4, /* htonr_us */
+ 4, /* htonr_ui */
0, /* htonr_ul */
4, /* ldr_c */
12, /* ldi_c */
20, /* bxsubi_u */
8, /* jmpr */
8, /* jmpi */
- 12, /* callr */
+ 8, /* callr */
16, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
8, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 20, /* bswapr_us */
- 52, /* bswapr_ui */
- 0, /* bswapr_ul */
- 36, /* casr */
- 44, /* casi */
-#endif /* NEW_ABI */
+ 8, /* clo */
+ 8, /* clz */
+ 76, /* cto */
+ 76, /* ctz */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 116
+#define JIT_INSTR_MAX 76
0, /* data */
0, /* live */
- 4, /* align */
+ 24, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
- 44, /* prolog */
+ 76, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
- 0, /* va_start */
- 0, /* va_arg */
- 0, /* va_arg_d */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
+ 4, /* va_start */
+ 8, /* va_arg */
+ 8, /* va_arg_d */
0, /* va_end */
4, /* addr */
28, /* addi */
36, /* subci */
28, /* subxr */
28, /* subxi */
- 32, /* rsbi */
+ 36, /* rsbi */
8, /* mulr */
32, /* muli */
12, /* qmulr */
8, /* remr_u */
32, /* remi_u */
4, /* andr */
- 28, /* andi */
+ 8, /* andi */
4, /* orr */
28, /* ori */
4, /* xorr */
4, /* ltr_u */
4, /* lti_u */
8, /* ler */
- 12, /* lei */
+ 4, /* lei */
8, /* ler_u */
- 12, /* lei_u */
- 12, /* eqr */
- 12, /* eqi */
+ 4, /* lei_u */
+ 8, /* eqr */
+ 8, /* eqi */
8, /* ger */
- 12, /* gei */
+ 8, /* gei */
8, /* ger_u */
- 12, /* gei_u */
+ 8, /* gei_u */
4, /* gtr */
8, /* gti */
4, /* gtr_u */
28, /* movi */
4, /* movnr */
4, /* movzr */
- 8, /* extr_c */
+ 36, /* casr */
+ 56, /* casi */
+ 4, /* extr_c */
4, /* extr_uc */
- 8, /* extr_s */
+ 4, /* extr_s */
4, /* extr_us */
4, /* extr_i */
- 8, /* extr_ui */
+ 4, /* extr_ui */
+ 8, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 44, /* bswapr_ul */
4, /* htonr_us */
4, /* htonr_ui */
4, /* htonr_ul */
4, /* ldr_c */
- 12, /* ldi_c */
+ 24, /* ldi_c */
4, /* ldr_uc */
- 12, /* ldi_uc */
+ 24, /* ldi_uc */
4, /* ldr_s */
- 12, /* ldi_s */
+ 24, /* ldi_s */
4, /* ldr_us */
- 12, /* ldi_us */
+ 24, /* ldi_us */
4, /* ldr_i */
- 12, /* ldi_i */
+ 24, /* ldi_i */
4, /* ldr_ui */
- 12, /* ldi_ui */
+ 24, /* ldi_ui */
4, /* ldr_l */
- 12, /* ldi_l */
+ 24, /* ldi_l */
8, /* ldxr_c */
- 4, /* ldxi_c */
+ 16, /* ldxi_c */
8, /* ldxr_uc */
- 4, /* ldxi_uc */
+ 16, /* ldxi_uc */
8, /* ldxr_s */
- 4, /* ldxi_s */
+ 16, /* ldxi_s */
8, /* ldxr_us */
- 4, /* ldxi_us */
+ 16, /* ldxi_us */
8, /* ldxr_i */
- 4, /* ldxi_i */
+ 16, /* ldxi_i */
8, /* ldxr_ui */
- 4, /* ldxi_ui */
+ 16, /* ldxi_ui */
8, /* ldxr_l */
- 4, /* ldxi_l */
+ 16, /* ldxi_l */
4, /* str_c */
- 12, /* sti_c */
+ 24, /* sti_c */
4, /* str_s */
- 12, /* sti_s */
+ 24, /* sti_s */
4, /* str_i */
- 12, /* sti_i */
+ 24, /* sti_i */
4, /* str_l */
- 12, /* sti_l */
+ 24, /* sti_l */
8, /* stxr_c */
- 4, /* stxi_c */
+ 16, /* stxi_c */
8, /* stxr_s */
- 4, /* stxi_s */
+ 16, /* stxi_s */
8, /* stxr_i */
- 4, /* stxi_i */
+ 16, /* stxi_i */
8, /* stxr_l */
- 4, /* stxi_l */
+ 16, /* stxi_l */
12, /* bltr */
12, /* blti */
12, /* bltr_u */
12, /* bgtr_u */
16, /* bgti_u */
8, /* bner */
- 32, /* bnei */
+ 28, /* bnei */
12, /* bmsr */
12, /* bmsi */
12, /* bmcr */
28, /* bxsubi */
16, /* bxsubr_u */
20, /* bxsubi_u */
- 0, /* jmpr */
+ 8, /* jmpr */
8, /* jmpi */
- 12, /* callr */
+ 8, /* callr */
32, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* retval_i */
0, /* retval_ui */
0, /* retval_l */
- 44, /* epilog */
+ 76, /* epilog */
0, /* arg_f */
0, /* getarg_f */
0, /* putargr_f */
0, /* putargi_f */
4, /* addr_f */
- 16, /* addi_f */
+ 28, /* addi_f */
4, /* subr_f */
- 16, /* subi_f */
- 16, /* rsbi_f */
+ 28, /* subi_f */
+ 28, /* rsbi_f */
4, /* mulr_f */
- 16, /* muli_f */
+ 28, /* muli_f */
4, /* divr_f */
- 16, /* divi_f */
+ 28, /* divi_f */
4, /* negr_f */
4, /* absr_f */
4, /* sqrtr_f */
16, /* ltr_f */
- 28, /* lti_f */
+ 40, /* lti_f */
16, /* ler_f */
- 28, /* lei_f */
+ 40, /* lei_f */
16, /* eqr_f */
- 28, /* eqi_f */
+ 40, /* eqi_f */
16, /* ger_f */
- 28, /* gei_f */
+ 40, /* gei_f */
16, /* gtr_f */
- 28, /* gti_f */
+ 40, /* gti_f */
16, /* ner_f */
- 28, /* nei_f */
+ 40, /* nei_f */
16, /* unltr_f */
- 28, /* unlti_f */
+ 40, /* unlti_f */
16, /* unler_f */
- 28, /* unlei_f */
+ 40, /* unlei_f */
16, /* uneqr_f */
- 28, /* uneqi_f */
+ 40, /* uneqi_f */
16, /* unger_f */
- 28, /* ungei_f */
+ 40, /* ungei_f */
16, /* ungtr_f */
- 28, /* ungti_f */
+ 40, /* ungti_f */
16, /* ltgtr_f */
- 28, /* ltgti_f */
+ 40, /* ltgti_f */
16, /* ordr_f */
- 28, /* ordi_f */
+ 40, /* ordi_f */
16, /* unordr_f */
- 28, /* unordi_f */
+ 40, /* unordi_f */
8, /* truncr_f_i */
8, /* truncr_f_l */
8, /* extr_f */
4, /* extr_d_f */
4, /* movr_f */
- 12, /* movi_f */
+ 24, /* movi_f */
4, /* ldr_f */
- 12, /* ldi_f */
+ 24, /* ldi_f */
8, /* ldxr_f */
- 4, /* ldxi_f */
+ 16, /* ldxi_f */
4, /* str_f */
- 12, /* sti_f */
+ 24, /* sti_f */
8, /* stxr_f */
- 4, /* stxi_f */
+ 16, /* stxi_f */
12, /* bltr_f */
- 24, /* blti_f */
+ 36, /* blti_f */
12, /* bler_f */
- 24, /* blei_f */
+ 36, /* blei_f */
12, /* beqr_f */
- 24, /* beqi_f */
+ 36, /* beqi_f */
12, /* bger_f */
- 24, /* bgei_f */
+ 36, /* bgei_f */
12, /* bgtr_f */
- 24, /* bgti_f */
+ 36, /* bgti_f */
12, /* bner_f */
- 24, /* bnei_f */
+ 36, /* bnei_f */
12, /* bunltr_f */
- 24, /* bunlti_f */
+ 36, /* bunlti_f */
12, /* bunler_f */
- 24, /* bunlei_f */
+ 36, /* bunlei_f */
12, /* buneqr_f */
- 24, /* buneqi_f */
+ 36, /* buneqi_f */
12, /* bunger_f */
- 24, /* bungei_f */
+ 36, /* bungei_f */
12, /* bungtr_f */
- 24, /* bungti_f */
+ 36, /* bungti_f */
12, /* bltgtr_f */
- 24, /* bltgti_f */
+ 36, /* bltgti_f */
12, /* bordr_f */
- 24, /* bordi_f */
+ 36, /* bordi_f */
12, /* bunordr_f */
- 24, /* bunordi_f */
+ 36, /* bunordi_f */
0, /* pushargr_f */
0, /* pushargi_f */
0, /* retr_f */
0, /* putargr_d */
0, /* putargi_d */
4, /* addr_d */
- 16, /* addi_d */
+ 28, /* addi_d */
4, /* subr_d */
- 16, /* subi_d */
- 16, /* rsbi_d */
+ 28, /* subi_d */
+ 28, /* rsbi_d */
4, /* mulr_d */
- 16, /* muli_d */
+ 28, /* muli_d */
4, /* divr_d */
- 16, /* divi_d */
+ 28, /* divi_d */
4, /* negr_d */
4, /* absr_d */
4, /* sqrtr_d */
16, /* ltr_d */
- 28, /* lti_d */
+ 44, /* lti_d */
16, /* ler_d */
- 28, /* lei_d */
+ 44, /* lei_d */
16, /* eqr_d */
- 28, /* eqi_d */
+ 44, /* eqi_d */
16, /* ger_d */
- 28, /* gei_d */
+ 44, /* gei_d */
16, /* gtr_d */
- 28, /* gti_d */
+ 44, /* gti_d */
16, /* ner_d */
- 28, /* nei_d */
+ 44, /* nei_d */
16, /* unltr_d */
- 28, /* unlti_d */
+ 44, /* unlti_d */
16, /* unler_d */
- 28, /* unlei_d */
+ 44, /* unlei_d */
16, /* uneqr_d */
- 28, /* uneqi_d */
+ 44, /* uneqi_d */
16, /* unger_d */
- 28, /* ungei_d */
+ 44, /* ungei_d */
16, /* ungtr_d */
- 28, /* ungti_d */
+ 44, /* ungti_d */
16, /* ltgtr_d */
- 28, /* ltgti_d */
+ 44, /* ltgti_d */
16, /* ordr_d */
- 28, /* ordi_d */
+ 44, /* ordi_d */
16, /* unordr_d */
- 28, /* unordi_d */
+ 44, /* unordi_d */
8, /* truncr_d_i */
8, /* truncr_d_l */
8, /* extr_d */
4, /* extr_f_d */
4, /* movr_d */
- 12, /* movi_d */
+ 28, /* movi_d */
4, /* ldr_d */
- 12, /* ldi_d */
+ 24, /* ldi_d */
8, /* ldxr_d */
- 4, /* ldxi_d */
+ 16, /* ldxi_d */
4, /* str_d */
- 12, /* sti_d */
+ 24, /* sti_d */
8, /* stxr_d */
- 4, /* stxi_d */
+ 16, /* stxi_d */
12, /* bltr_d */
- 24, /* blti_d */
+ 36, /* blti_d */
12, /* bler_d */
- 24, /* blei_d */
+ 36, /* blei_d */
12, /* beqr_d */
- 24, /* beqi_d */
+ 36, /* beqi_d */
12, /* bger_d */
- 24, /* bgei_d */
+ 36, /* bgei_d */
12, /* bgtr_d */
- 24, /* bgti_d */
+ 36, /* bgti_d */
12, /* bner_d */
- 24, /* bnei_d */
+ 40, /* bnei_d */
12, /* bunltr_d */
- 24, /* bunlti_d */
+ 40, /* bunlti_d */
12, /* bunler_d */
- 24, /* bunlei_d */
+ 40, /* bunlei_d */
12, /* buneqr_d */
- 24, /* buneqi_d */
+ 40, /* buneqi_d */
12, /* bunger_d */
- 24, /* bungei_d */
+ 40, /* bungei_d */
12, /* bungtr_d */
- 24, /* bungti_d */
+ 40, /* bungti_d */
12, /* bltgtr_d */
- 24, /* bltgti_d */
+ 36, /* bltgti_d */
12, /* bordr_d */
- 24, /* bordi_d */
+ 36, /* bordi_d */
12, /* bunordr_d */
- 24, /* bunordi_d */
+ 40, /* bunordi_d */
0, /* pushargr_d */
0, /* pushargi_d */
0, /* retr_d */
0, /* movr_d_ww */
0, /* movi_d_ww */
4, /* movr_d_w */
- 12, /* movi_d_w */
- 20, /* bswapr_us */
- 52, /* bswapr_ui */
- 116, /* bswapr_ul */
- 36, /* casr */
- 44, /* casi */
+ 24, /* movi_d_w */
+ 4, /* clo */
+ 4, /* clz */
+ 72, /* cto */
+ 72, /* ctz */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# include <sys/cachectl.h>
#endif
+#if NEW_ABI
+/* callee save + variadic arguments
+ * align16(ra+fp+s[0-7]++f20+f22+f24+f26+f28+f30) + align16(a[0-7]) */
+# define stack_framesize (128 + 64)
+#else
+/* callee save
+ * align16(ra+fp+s[0-7]+f16+f18+f20+f22+f24+f26+f28+f30) */
+# define stack_framesize 128
+#endif
+
#if NEW_ABI
# define NUM_WORD_ARGS 8
# define STACK_SLOT 8
/*
* Prototypes
*/
-#define jit_make_arg(node) _jit_make_arg(_jit,node)
-static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*);
+#define jit_make_arg(node,code) _jit_make_arg(_jit,node,code)
+static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t);
#define jit_make_arg_f(node) _jit_make_arg_f(_jit,node)
static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
#define jit_make_arg_d(node) _jit_make_arg_d(_jit,node)
static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*);
+#define compute_framesize() _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
#define patch(instr, node) _patch(_jit, instr, node)
static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
# include "jit_rewind.c"
# include "jit_mips-cpu.c"
# include "jit_mips-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
* Initialization
*/
+jit_cpu_t jit_cpu;
jit_register_t _rvs[] = {
{ rc(gpr) | 0x01, "at" },
{ rc(gpr) | 0x02, "v0" },
{ _NOREG, "<none>" },
};
+static jit_int32_t iregs[] = {
+ _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7
+};
+
+static jit_int32_t fregs[] = {
+#if !NEW_ABI
+ _F16, _F18,
+#endif
+ _F20, _F22, _F24, _F26, _F28, _F30
+};
+
/*
* Implementation
*/
void
jit_get_cpu(void)
{
+#if defined(__linux__)
+ FILE *fp;
+ char *ptr;
+ char buf[128];
+
+ if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
+ while (fgets(buf, sizeof(buf), fp)) {
+ if (strncmp(buf, "isa : ", 8) == 0) {
+ if ((ptr = strstr(buf + 9, "mips64r")))
+ jit_cpu.release = strtoul(ptr + 7, NULL, 10);
+ break;
+ }
+ }
+ fclose(fp);
+ }
+#endif
+#if __mips_isa_rev
+ if (!jit_cpu.release)
+ jit_cpu.release = __mips_isa_rev;
+#elif defined _MIPS_ARCH
+ if (!jit_cpu.release)
+ jit_cpu.release = strtoul(&_MIPS_ARCH[4], NULL, 10);
+#elif defined(__mips) && __mips < 6
+ if (!jit_cpu.release)
+ jit_cpu.release = __mips;
+#endif
}
void
_jit_allocai(jit_state_t *_jit, jit_int32_t length)
{
assert(_jitc->function);
+ jit_check_frame();
switch (length) {
case 0: case 1: break;
case 2: _jitc->function->self.aoff &= -2; break;
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
- if (JIT_RET != u)
- jit_movr(JIT_RET, u);
- jit_live(JIT_RET);
+ jit_code_inc_synth_w(code, u);
+ jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- if (u->code == jit_code_arg)
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
#if NEW_ABI
- return (jit_arg_reg_p(u->u.w));
+ return (jit_arg_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
#else
return (u->u.w < 8);
#endif
}
static jit_node_t *
-_jit_make_arg(jit_state_t *_jit, jit_node_t *node)
+_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code)
{
jit_int32_t offset;
#if NEW_ABI
}
#else
offset = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
- _jitc->function->self.argi = 1;
+ ++_jitc->function->self.argi;
if (offset >= 4)
offset = _jitc->function->self.size;
_jitc->function->self.size += STACK_SLOT;
#endif
if (node == (jit_node_t *)0)
- node = jit_new_node(jit_code_arg);
+ node = jit_new_node(code);
else
link_node(node);
node->u.w = offset;
else {
assert(!(_jitc->function->self.call & jit_call_varargs));
#if NEW_ABI
- /* If varargs start in a register, allocate extra 64 bytes. */
if (jit_arg_reg_p(_jitc->function->self.argi))
rewind_prolog();
/* Do not set during possible rewind. */
_jitc->function->vagp = _jitc->function->self.argi;
}
jit_inc_synth(ellipsis);
+ jit_check_frame();
if (_jitc->prepare)
jit_link_prepare();
else
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
assert(_jitc->function);
- return (jit_make_arg((jit_node_t*)0));
+ assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
+ return (jit_make_arg((jit_node_t*)0, code));
}
jit_node_t *
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, _A0 - v->u.w);
- else
- jit_ldxi_c(u, _FP, v->u.w + C_DISP);
+ else {
+ jit_node_t *node = jit_ldxi_c(u, _FP, v->u.w + C_DISP);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, _A0 - v->u.w);
- else
- jit_ldxi_uc(u, _FP, v->u.w + C_DISP);
+ else {
+ jit_node_t *node = jit_ldxi_uc(u, _FP, v->u.w + C_DISP);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, _A0 - v->u.w);
- else
- jit_ldxi_s(u, _FP, v->u.w + S_DISP);
+ else {
+ jit_node_t *node = jit_ldxi_s(u, _FP, v->u.w + S_DISP);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, _A0 - v->u.w);
- else
- jit_ldxi_us(u, _FP, v->u.w + S_DISP);
+ else {
+ jit_node_t *node = jit_ldxi_us(u, _FP, v->u.w + S_DISP);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w)) {
#if __WORDSIZE == 64
jit_movr(u, _A0 - v->u.w);
#endif
}
- else
- jit_ldxi_i(u, _FP, v->u.w + I_DISP);
+ else {
+ jit_node_t *node = jit_ldxi_i(u, _FP, v->u.w + I_DISP);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_ui, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_ui(u, _A0 - v->u.w);
- else
- jit_ldxi_ui(u, _FP, v->u.w + I_DISP);
+ else {
+ jit_node_t *node = jit_ldxi_ui(u, _FP, v->u.w + I_DISP);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_l, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, _A0 - v->u.w);
- else
- jit_ldxi_l(u, _FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_l(u, _FP, v->u.w);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
#endif
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- jit_inc_synth_wp(putargr, u, v);
- assert(v->code == jit_code_arg);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(_A0 - v->u.w, u);
- else
- jit_stxi(v->u.w + WORD_ADJUST, _FP, u);
+ else {
+ jit_node_t *node = jit_stxi(v->u.w + WORD_ADJUST, _FP, u);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movi(_A0 - v->u.w, u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
- jit_stxi(v->u.w + WORD_ADJUST, _FP, regno);
+ node = jit_stxi(v->u.w + WORD_ADJUST, _FP, regno);
+ jit_link_alist(node);
+ jit_check_frame();
jit_unget_reg(regno);
}
jit_dec_synth();
if (jit_arg_reg_p(v->u.w))
jit_movr_f(u, _F12 - v->u.w);
else if (jit_arg_reg_p(v->u.w - 8))
- jit_movr_w_f(u, _A0 - v->u.w - 8);
+ jit_movr_w_f(u, _A0 - (v->u.w - 8));
#else
if (v->u.w < 4)
jit_movr_w_f(u, _A0 - v->u.w);
else if (v->u.w < 8)
jit_movr_f(u, _F12 - ((v->u.w - 4) >> 1));
#endif
- else
- jit_ldxi_f(u, _FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_f(u, _FP, v->u.w);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
if (jit_arg_reg_p(v->u.w))
jit_movr_f(_F12 - v->u.w, u);
else if (jit_arg_reg_p(v->u.w - 8))
- jit_movr_f_w(_A0 - v->u.w - 8, u);
+ jit_movr_f_w(_A0 - (v->u.w - 8), u);
#else
if (v->u.w < 4)
jit_movr_f_w(_A0 - v->u.w, u);
else if (v->u.w < 8)
jit_movr_f(_F12 - ((v->u.w - 4) >> 1), u);
#endif
- else
- jit_stxi_f(v->u.w, _FP, u);
+ else {
+ jit_node_t *node = jit_stxi_f(v->u.w, _FP, u);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
#if NEW_ABI
if (jit_arg_reg_p(v->u.w))
jit_movi_f(_F12 - v->u.w, u);
- else if (jit_arg_reg_p(v->u.w - 8)) {
- regno = jit_get_reg(jit_class_fpr);
- jit_movi_f(regno, u);
- jit_movr_f_w(_A0 - v->u.w - 8, u);
- jit_unget_reg(regno);
- }
+ else if (jit_arg_reg_p(v->u.w - 8))
+ jit_movi_f_w(_A0 - (v->u.w - 8), u);
#else
if (v->u.w < 4) {
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(_F12 - ((v->u.w - 4) >> 1), u);
#endif
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
- jit_stxi_f(v->u.w, _FP, regno);
+ node = jit_stxi_f(v->u.w, _FP, regno);
+ jit_link_alist(node);
+ jit_check_frame();
jit_unget_reg(regno);
}
jit_dec_synth();
if (jit_arg_reg_p(v->u.w))
jit_movr_d(u, _F12 - v->u.w);
else if (jit_arg_reg_p(v->u.w - 8))
- jit_movr_d_w(_A0 - v->u.w - 8, u);
+ jit_movr_d_w(_A0 - (v->u.w - 8), u);
#else
if (v->u.w < 4)
jit_movr_ww_d(u, _A0 - v->u.w, _A0 - (v->u.w + 1));
else if (v->u.w < 8)
jit_movr_d(u, _F12 - ((v->u.w - 4) >> 1));
#endif
- else
- jit_ldxi_d(u, _FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_d(u, _FP, v->u.w);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
if (jit_arg_reg_p(v->u.w))
jit_movr_d(_F12 - v->u.w, u);
else if (jit_arg_reg_p(v->u.w - 8))
- jit_movr_d_w(_A0 - v->u.w - 8, u);
+ jit_movr_d_w(_A0 - (v->u.w - 8), u);
#else
if (v->u.w < 4)
jit_movr_d_ww(_A0 - v->u.w, _A0 - (v->u.w + 1), u);
else if (v->u.w < 8)
jit_movr_d(_F12 - ((v->u.w - 4) >> 1), u);
#endif
- else
- jit_stxi_d(v->u.w, _FP, u);
+ else {
+ jit_node_t *node = jit_stxi_d(v->u.w, _FP, u);
+ jit_link_alist(node);
+ jit_check_frame();
+ }
jit_dec_synth();
}
#if NEW_ABI
if (jit_arg_reg_p(v->u.w))
jit_movi_d(_F12 - v->u.w, u);
- else if (jit_arg_reg_p(v->u.w - 8)) {
- regno = jit_get_reg(jit_class_fpr);
- jit_movi_d(regno, u);
- jit_movr_d_w(_A0 - v->u.w - 8, u);
- jit_unget_reg(regno);
- }
+ else if (jit_arg_reg_p(v->u.w - 8))
+ jit_movi_d_w(_A0 - (v->u.w - 8), u);
#else
if (v->u.w < 4) {
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(_F12 - ((v->u.w - 4) >> 1), u);
#endif
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
- jit_stxi_d(v->u.w, _FP, regno);
+ node = jit_stxi_d(v->u.w, _FP, regno);
+ jit_link_alist(node);
+ jit_check_frame();
jit_unget_reg(regno);
}
jit_dec_synth();
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
#if NEW_ABI
assert(_jitc->function);
++_jitc->function->call.argi;
}
else {
+ jit_check_frame();
jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, u);
_jitc->function->call.size += STACK_SLOT;
}
jit_word_t offset;
assert(_jitc->function);
offset = _jitc->function->call.size >> STACK_SHIFT;
- _jitc->function->call.argi = 1;
+ ++_jitc->function->call.argi;
if (jit_arg_reg_p(offset))
jit_movr(_A0 - offset, u);
- else
+ else {
+ jit_check_frame();
jit_stxi(_jitc->function->call.size, JIT_SP, u);
+ }
_jitc->function->call.size += STACK_SLOT;
#endif
jit_dec_synth();
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
#if !NEW_ABI
jit_word_t offset;
#endif
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
#if NEW_ABI
if (jit_arg_reg_p(_jitc->function->call.argi)) {
++_jitc->function->call.argi;
}
else {
+ jit_check_frame();
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, regno);
if (jit_arg_reg_p(offset))
jit_movi(_A0 - offset, u);
else {
+ jit_check_frame();
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
jit_stxi(_jitc->function->call.size, JIT_SP, regno);
++_jitc->function->call.argi;
}
else {
+ jit_check_frame();
jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += STACK_SLOT;
}
++_jitc->function->call.argi;
jit_movr_f_w(_A0 - offset, u);
}
- else
+ else {
+ jit_check_frame();
jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+ }
_jitc->function->call.size += STACK_SLOT;
#endif
jit_dec_synth();
++_jitc->function->call.argi;
}
else {
+ jit_check_frame();
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
jit_movi_f_w(_A0 - offset, u);
}
else {
+ jit_check_frame();
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
++_jitc->function->call.argi;
}
else {
+ jit_check_frame();
jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += STACK_SLOT;
}
++_jitc->function->call.argf;
}
}
- else
+ else {
+ jit_check_frame();
jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+ }
_jitc->function->call.size += sizeof(jit_float64_t);
#endif
jit_dec_synth();
++_jitc->function->call.argi;
}
else {
+ jit_check_frame();
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
}
}
else {
+ jit_check_frame();
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
{
jit_node_t *call;
assert(_jitc->function);
+ jit_check_frame();
jit_inc_synth_w(finishr, r0);
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
{
jit_node_t *call;
- jit_node_t *node;
assert(_jitc->function);
+ jit_check_frame();
jit_inc_synth_w(finishi, (jit_word_t)i0);
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
- node = jit_movi(_T9, (jit_word_t)i0);
- call = jit_callr(_T9);
+ call = jit_calli(i0);
call->v.w = _jitc->function->call.argi;
#if NEW_ABI
call->w.w = call->v.w;
_jitc->function->call.size = 0;
_jitc->prepare = 0;
jit_dec_synth();
- return (node);
+ return (call);
}
void
jit_word_t word;
jit_int32_t value;
jit_int32_t offset;
+
struct {
jit_node_t *node;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
prevw = _jit->pc.w;
#endif
value = jit_classify(node->code);
+#if GET_JIT_SIZE
+ flush();
+#endif
jit_regarg_set(node, value);
switch (node->code) {
case jit_code_align:
/* Must align to a power of two */
assert(!(node->u.w & (node->u.w - 1)));
+ flush();
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
+ flush();
+ break;
+ case jit_code_skip:
+ flush();
+ nop((node->u.w + 3) & ~3);
+ flush();
break;
case jit_code_note: case jit_code_name:
+ flush();
node->u.w = _jit->pc.w;
break;
case jit_code_label:
+ flush();
/* remember label is defined */
node->flag |= jit_flag_patch;
node->u.w = _jit->pc.w;
break;
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_rrr(lt,);
case_rrw(lt,);
case_rrr(lt, _u);
case_brr(bunord, _d);
case_brf(bunord, _d, 64);
case jit_code_jmpr:
+ jit_check_frame();
jmpr(rn(node->u.w));
break;
case jit_code_jmpi:
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
if (temp->flag & jit_flag_patch)
- jmpi(temp->u.w);
+ jmpi(temp->u.w, 0);
else {
- word = jmpi(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (jit_mips2_p() && can_relative_jump_p(word))
+ word = jmpi(_jit->pc.w, 1);
+ else
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
- else
- jmpi(node->u.w);
+ else {
+ jit_check_frame();
+ jmpi(node->u.w, 0);
+ }
break;
case jit_code_callr:
+ jit_check_frame();
callr(rn(node->u.w));
break;
case jit_code_calli:
temp = node->u.n;
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
- word = calli_p(temp->u.w);
- if (!(temp->flag & jit_flag_patch))
+ if (temp->flag & jit_flag_patch)
+ calli(temp->u.w, 0);
+ else {
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (jit_mips2_p() && can_relative_jump_p(word))
+ word = calli(_jit->pc.w, 1);
+ else
+ word = calli_p(_jit->pc.w);
patch(word, node);
+ }
+ }
+ else {
+ jit_check_frame();
+ calli(node->u.w, 0);
}
- else
- calli(node->u.w);
break;
case jit_code_prolog:
+ flush();
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
undo.patch_offset = _jitc->patches.offset;
restart_function:
_jitc->again = 0;
+ compute_framesize();
+ patch_alist(0);
prolog(node);
break;
case jit_code_epilog:
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ undo.func.need_frame = _jitc->function->need_frame;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ /* this will be recomputed but undo anyway to have it
+ * better self documented.*/
+ undo.func.need_stack = _jitc->function->need_stack;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
_jitc->patches.offset = undo.patch_offset;
+ patch_alist(1);
goto restart_function;
}
/* remember label is defined */
+ flush();
node->flag |= jit_flag_patch;
node->u.w = _jit->pc.w;
epilog(node);
case jit_code_va_arg_d:
vaarg_d(rn(node->u.w), rn(node->v.w));
break;
- case jit_code_live:
- case jit_code_arg: case jit_code_ellipsis:
+ case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i:
+# if __WORDSIZE == 64
+ case jit_code_arg_l:
+# endif
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+#if __WORDSIZE == 64
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
+#endif
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_ui: case jit_code_getarg_l:
#endif
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+#if __WORDSIZE == 64
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
+#endif
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
+#endif
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
break;
}
}
+#if GET_JIT_SIZE
+ flush();
+#endif
jit_regarg_clr(node, value);
assert(_jitc->regarg == 0 ||
(jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry)));
/* update register live state */
jit_reglive(node);
}
+ flush();
#undef case_brf
#undef case_brw
#undef case_brr
# include "jit_rewind.c"
# include "jit_mips-cpu.c"
# include "jit_mips-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
stxi_d(i0, rn(r0), rn(r1));
}
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+ jit_int32_t reg;
+ _jitc->framesize = STACK_SLOT << 1; /* ra+fp */
+ for (reg = 0; reg < jit_size(iregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+ _jitc->framesize += STACK_SLOT;
+
+ for (reg = 0; reg < jit_size(fregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+ _jitc->framesize += sizeof(jit_float64_t);
+
+#if NEW_ABI
+ /* Space to store variadic arguments */
+ if (_jitc->function->self.call & jit_call_varargs)
+ _jitc->framesize += (NUM_WORD_ARGS - _jitc->function->vagp) * STACK_SLOT;
+#endif
+
+ /* Make sure functions called have a 16 byte aligned stack */
+ _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
static void
_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
{
/*
- * Copyright (C) 2014-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
"data",
"live", "align",
"save", "load",
+ "skip",
"#name", "#note",
"label",
"prolog",
"ellipsis", "va_push",
"allocai", "allocar",
- "arg",
+ "arg_c",
+ "arg_s",
+ "arg_i",
+ "arg_l",
"getarg_c", "getarg_uc",
"getarg_s", "getarg_us",
"getarg_i", "getarg_ui",
"getarg_l",
- "putargr", "putargi",
+ "putargr_c", "putargi_c",
+ "putargr_uc", "putargi_uc",
+ "putargr_s", "putargi_s",
+ "putargr_us", "putargi_us",
+ "putargr_i", "putargi_i",
+ "putargr_ui", "putargi_ui",
+ "putargr_l", "putargi_l",
"va_start",
"va_arg", "va_arg_d",
"va_end",
"ner", "nei",
"movr", "movi",
"movnr", "movzr",
+ "casr", "casi",
"extr_c", "extr_uc",
"extr_s", "extr_us",
"extr_i", "extr_ui",
+ "bswapr_us",
+ "bswapr_ui", "bswapr_ul",
"htonr_us",
"htonr_ui", "htonr_ul",
"ldr_c", "ldi_c",
"jmpr", "jmpi",
"callr", "calli",
"prepare",
- "pushargr", "pushargi",
+ "pushargr_c", "pushargi_c",
+ "pushargr_uc", "pushargi_uc",
+ "pushargr_s", "pushargi_s",
+ "pushargr_us", "pushargi_us",
+ "pushargr_i", "pushargi_i",
+ "pushargr_ui", "pushargi_ui",
+ "pushargr_l", "pushargi_l",
"finishr", "finishi",
"ret",
- "retr", "reti",
+ "retr_c", "reti_c",
+ "retr_uc", "reti_uc",
+ "retr_s", "reti_s",
+ "retr_us", "reti_us",
+ "retr_i", "reti_i",
+ "retr_ui", "reti_ui",
+ "retr_l", "reti_l",
"retval_c", "retval_uc",
"retval_s", "retval_us",
"retval_i", "retval_ui",
"movr_f_w", "movi_f_w",
"movr_d_ww", "movi_d_ww",
"movr_d_w", "movi_d_w",
- "bswapr_us",
- "bswapr_ui", "bswapr_ul",
- "casr", "casi",
+ "clo", "clz",
+ "cto", "ctz",
};
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# define XCMPLI(cr,l,a,u) FCI(10,cr,l,a,u)
# define CMPLDI(a,s) XCMPLI(0,1,a,s)
# define CMPLWI(a,s) XCMPLI(0,0,a,s)
+# if __WORDSIZE == 32
+# define CMPX(a,b) CMPW(a,b)
+# define CMPXI(a,s) CMPWI(a,s)
+# define CMPLX(a,b) CMPLW(a,b)
+# define CMPLXI(a,s) CMPLWI(a,s)
+# else
+# define CMPX(a,b) CMPD(a,b)
+# define CMPXI(a,s) CMPDI(a,s)
+# define CMPLX(a,b) CMPLD(a,b)
+# define CMPLXI(a,s) CMPLDI(a,s)
+# endif
# define CNTLZW(a,s) FX(31,s,a,0,26)
# define CNTLZW_(a,s) FX_(31,s,a,0,26)
+# define CNTLZD(a,s) FX(31,s,a,0,58)
+# define CNTLZD_(a,s) FX_(31,s,a,0,58)
# define CRAND(d,a,b) FX(19,d,a,b,257)
# define CRANDC(d,a,b) FX(19,d,a,b,129)
# define CREQV(d,a,b) FX(19,d,a,b,289)
#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define negr(r0,r1) NEG(r0,r1)
# define comr(r0,r1) NOT(r0,r1)
+# define bitswap(r0, r1) _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# if __WORDSIZE == 32
+# define clzr(r0, r1) CNTLZW(r0, r1)
+# else
+# define clzr(r0, r1) CNTLZD(r0, r1)
+# endif
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# define extr_c(r0,r1) EXTSB(r0,r1)
# define extr_uc(r0,r1) ANDI_(r0,r1,0xff)
# define extr_s(r0,r1) EXTSH(r0,r1)
# define callr(r0,i0) _callr(_jit,r0,i0)
static void _callr(jit_state_t*,jit_int32_t,jit_int32_t);
# define calli(i0,i1) _calli(_jit,i0,i1)
-static void _calli(jit_state_t*,jit_word_t,jit_int32_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t,jit_int32_t);
# define calli_p(i0,i1) _calli_p(_jit,i0,i1)
static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_int32_t);
# else
# define callr(r0) _callr(_jit,r0)
static void _callr(jit_state_t*,jit_int32_t);
# define calli(i0) _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
# define calli_p(i0) _calli_p(_jit,i0)
static jit_word_t _calli_p(jit_state_t*,jit_word_t);
#endif
static void
_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPWI(r2, 0);
+ CMPXI(r2, 0);
BEQ(8);
MR(r0, r1);
}
static void
_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPWI(r2, 0);
+ CMPXI(r2, 0);
BNE(8);
MR(r0, r1);
}
jit_unget_reg(r1_reg);
}
+/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+/*
+unsigned int v; // 32-bit word to reverse bit order
+
+// swap odd and even bits
+v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
+// swap consecutive pairs
+v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
+// swap nibbles ...
+v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
+// swap bytes
+v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
+// swap 2-byte long pairs
+v = ( v >> 16 ) | ( v << 16);
+ */
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t t0, t1, t2, t3, t4;
+ movr(r0, r1);
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ t2 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+ rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+ rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+ rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
+ rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# if __WORDSIZE == 32
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ lshi(rn(t2), r0, 16); /* t2 = v << 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# else
+ movi(rn(t0), 0x0000ffff0000ffffL);
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
+ lshi(rn(t2), r0, 32); /* t2 = v << 32 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# endif
+ jit_unget_reg(t2);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ bitswap(r0, r1);
+ clor(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ bitswap(r0, r1);
+ clzr(r0, r0);
+}
+
static void
_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
{
static void
_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_LT);
}
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
MFCR(r0);
static void
_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
CRNOT(CR_GT, CR_GT);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_GT);
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
CRNOT(CR_GT, CR_GT);
static void
_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_EQ);
}
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else if (can_zero_extend_short_p(i0))
CMPLWI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
MFCR(r0);
static void
_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
CRNOT(CR_LT, CR_LT);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_LT);
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
CRNOT(CR_LT, CR_LT);
static void
_gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_GT);
}
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
MFCR(r0);
static void
_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
- CMPW(r1, r2);
+ CMPX(r1, r2);
CRNOT(CR_EQ, CR_EQ);
MFCR(r0);
EXTRWI(r0, r0, 1, CR_EQ);
{
jit_int32_t reg;
if (can_sign_extend_short_p(i0))
- CMPWI(r1, i0);
+ CMPXI(r1, i0);
else if (can_zero_extend_short_p(i0))
CMPLWI(r1, i0);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
- CMPW(r1, rn(reg));
+ CMPX(r1, rn(reg));
jit_unget_reg(reg);
}
CRNOT(CR_EQ, CR_EQ);
_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BLT(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BLE(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BEQ(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else if (can_zero_extend_short_p(i1))
CMPLWI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BGE(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BGT(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
jit_word_t d, w;
- CMPW(r0, r1);
+ CMPX(r0, r1);
w = _jit->pc.w;
d = (i0 - w) & ~3;
BNE(d);
jit_int32_t reg;
jit_word_t d, w;
if (can_sign_extend_short_p(i1))
- CMPWI(r0, i1);
+ CMPXI(r0, i1);
else if (can_zero_extend_short_p(i1))
CMPLWI(r0, i1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i1);
- CMPW(r0, rn(reg));
+ CMPX(r0, rn(reg));
jit_unget_reg(reg);
}
w = _jit->pc.w;
jit_int32_t reg;
if (r1 == _R0_REGNO) {
if (r2 != _R0_REGNO)
- LWZX(r0, r2, r1);
+ LWAX(r0, r2, r1);
else {
reg = jit_get_reg(jit_class_gpr);
movr(rn(reg), r1);
}
}
else
- LWZX(r0, r1, r2);
+ LWAX(r0, r1, r2);
}
static void
}
/* assume fixed address or reachable address */
-static void
+static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0
# if _CALL_SYSV
, jit_int32_t varargs
# endif
)
{
+ jit_word_t w;
# if _CALL_SYSV
jit_word_t d;
d = (i0 - _jit->pc.w - !!varargs * 4) & ~3;
if (can_sign_extend_jump_p(d)) {
- /* Tell double arguments were passed in registers. */
- if (varargs)
- CREQV(6, 6, 6);
- BL(d);
- } else
+ /* Tell double arguments were passed in registers. */
+ if (varargs)
+ CREQV(6, 6, 6);
+ w = _jit->pc.w;
+ BL(d);
+ }
+ else
# endif
{
+ w = _jit->pc.w;
movi(_R12_REGNO, i0);
callr(_R12_REGNO
# if _CALL_SYSV
# endif
);
}
+ return (w);
}
/* absolute jump */
if (!can_sign_extend_short_p(d)) {
/* use absolute address */
assert(can_sign_extend_short_p(label));
- d |= 2;
+ d = label | 2;
}
u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe);
break;
if (!can_sign_extend_jump_p(d)) {
/* use absolute address */
assert(can_sign_extend_jump_p(label));
- d |= 2;
+ d = label | 2;
}
- u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe);
+ u.i[0] = (u.i[0] & ~0x3fffffc) | (d & 0x3fffffd);
break;
case 15: /* LI */
#if __WORDSIZE == 32
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#if __WORDSIZE == 32
#if defined(__powerpc__)
#if __BYTE_ORDER == __BIG_ENDIAN
-#if _CALL_SYSV
-#define JIT_INSTR_MAX 124
+#if !_CALL_SYSV
+#define JIT_INSTR_MAX 136
0, /* data */
0, /* live */
- 0, /* align */
+ 20, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
- 124, /* prolog */
+ 136, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
- 36, /* va_start */
- 52, /* va_arg */
- 64, /* va_arg_d */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
+ 4, /* va_start */
+ 8, /* va_arg */
+ 8, /* va_arg_d */
0, /* va_end */
4, /* addr */
12, /* addi */
8, /* movi */
12, /* movnr */
12, /* movzr */
+ 36, /* casr */
+ 44, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
+ 8, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 0, /* bswapr_ul */
4, /* htonr_us */
4, /* htonr_ui */
0, /* htonr_ul */
16, /* bxsubi_u */
8, /* jmpr */
4, /* jmpi */
- 12, /* callr */
- 20, /* calli */
+ 28, /* callr */
+ 36, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
36, /* extr_d */
4, /* extr_f_d */
4, /* movr_d */
- 24, /* movi_d */
+ 28, /* movi_d */
4, /* ldr_d */
8, /* ldi_d */
4, /* ldxr_d */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 8, /* bswapr_us */
- 16, /* bswapr_ui */
- 0, /* bswapr_ul */
- 36, /* casr */
- 44, /* casi */
-#endif /* _CALL_SYSV */
+ 8, /* clo */
+ 4, /* clz */
+ 136, /* cto */
+ 132, /* ctz */
+#endif /* !_CALL_SYSV */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
#if __WORDSIZE == 32
#if defined(__powerpc__)
#if __BYTE_ORDER == __BIG_ENDIAN
-#if !_CALL_SYSV
+#if _CALL_SYSV
#define JIT_INSTR_MAX 136
0, /* data */
0, /* live */
- 0, /* align */
+ 28, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
- 136, /* prolog */
+ 124, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
- 4, /* va_start */
- 8, /* va_arg */
- 8, /* va_arg_d */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
+ 36, /* va_start */
+ 52, /* va_arg */
+ 64, /* va_arg_d */
0, /* va_end */
4, /* addr */
12, /* addi */
12, /* remr_u */
20, /* remi_u */
4, /* andr */
- 12, /* andi */
+ 4, /* andi */
4, /* orr */
12, /* ori */
4, /* xorr */
16, /* nei */
4, /* movr */
8, /* movi */
- 12, /* movnr */
- 12, /* movzr */
+ 12, /* movnr */
+ 12, /* movzr */
+ 36, /* casr */
+ 44, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
+ 8, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 0, /* bswapr_ul */
4, /* htonr_us */
4, /* htonr_ui */
0, /* htonr_ul */
16, /* bxsubi_u */
8, /* jmpr */
4, /* jmpi */
- 28, /* callr */
- 40, /* calli */
+ 12, /* callr */
+ 20, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
24, /* unordi_f */
12, /* truncr_f_i */
0, /* truncr_f_l */
- 20, /* extr_f */
+ 36, /* extr_f */
4, /* extr_d_f */
4, /* movr_f */
12, /* movi_f */
32, /* unordi_d */
12, /* truncr_d_i */
0, /* truncr_d_l */
- 20, /* extr_d */
+ 36, /* extr_d */
4, /* extr_f_d */
4, /* movr_d */
- 24, /* movi_d */
+ 28, /* movi_d */
4, /* ldr_d */
8, /* ldi_d */
4, /* ldxr_d */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 20, /* bswapr_us */
- 16, /* bswapr_ui */
- 0, /* bswapr_ul */
- 36, /* casr */
- 44, /* casi */
-#endif /* _CALL_AIX */
-#endif /* __BYTEORDER */
+ 8, /* clo */
+ 4, /* clz */
+ 136, /* cto */
+ 132, /* ctz */
+#endif /* _CALL_SYSV */
+#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
#if defined(__powerpc__)
#if __BYTE_ORDER == __BIG_ENDIAN
-#define JIT_INSTR_MAX 148
+#define JIT_INSTR_MAX 236
0, /* data */
0, /* live */
- 4, /* align */
+ 28, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
8, /* va_arg_d */
36, /* movi */
12, /* movnr */
12, /* movzr */
+ 36, /* casr */
+ 44, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
4, /* extr_i */
4, /* extr_ui */
+ 8, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 44, /* bswapr_ul */
4, /* htonr_us */
4, /* htonr_ui */
4, /* htonr_ul */
28, /* callr */
52, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 8, /* bswapr_us */
- 16, /* bswapr_ui */
- 44, /* bswapr_ul */
- 36, /* casr */
- 44, /* casi */
+ 8, /* clo */
+ 4, /* clz */
+ 236, /* cto */
+ 232, /* ctz */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
#if defined(__powerpc__)
#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define JIT_INSTR_MAX 124
+#define JIT_INSTR_MAX 236
0, /* data */
0, /* live */
- 4, /* align */
+ 20, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
8, /* va_arg_d */
36, /* movi */
12, /* movnr */
12, /* movzr */
+ 36, /* casr */
+ 44, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
4, /* extr_i */
4, /* extr_ui */
+ 8, /* bswapr_us */
+ 16, /* bswapr_ui */
+ 44, /* bswapr_ul */
8, /* htonr_us */
16, /* htonr_ui */
44, /* htonr_ul */
12, /* callr */
32, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 8, /* bswapr_us */
- 16, /* bswapr_ui */
- 44, /* bswapr_ul */
- 36, /* casr */
- 44, /* casi */
+ 8, /* clo */
+ 4, /* clz */
+ 236, /* cto */
+ 232, /* ctz */
#endif /* __BYTE_ORDER */
#endif /* __powerpc__ */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
- if (JIT_RET != u)
- jit_movr(JIT_RET, u);
- jit_live(JIT_RET);
+ jit_code_inc_synth_w(code, u);
+ jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- if (u->code == jit_code_arg)
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
return (jit_arg_f_reg_p(u->u.w));
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
jit_bool_t incr = 1;
assert(_jitc->function);
+ assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
if (jit_arg_reg_p(_jitc->function->self.argi)) {
offset = _jitc->function->self.argi++;
#if _CALL_SYSV
offset = _jitc->function->self.size;
if (incr)
_jitc->function->self.size += sizeof(jit_word_t);
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, JIT_RA0 - v->u.w);
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, JIT_RA0 - v->u.w);
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, JIT_RA0 - v->u.w);
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, JIT_RA0 - v->u.w);
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w)) {
#if __WORDSIZE == 32
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_ui, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_ui(u, JIT_RA0 - v->u.w);
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_l, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, JIT_RA0 - v->u.w);
#endif
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(JIT_RA0 - v->u.w, u);
else
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- jit_inc_synth_wp(putargi, u, v);
- assert(v->code == jit_code_arg);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movi(JIT_RA0 - v->u.w, u);
else {
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
jit_bool_t incr = 1;
assert(_jitc->function);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
jit_bool_t incr = 1;
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
struct {
jit_node_t *node;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
+ case jit_code_skip:
+ nop((node->u.w + 3) & ~3);
+ break;
case jit_code_note: case jit_code_name:
node->u.w = _jit->pc.w;
break;
# endif
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case jit_code_casr:
casr(rn(node->u.w), rn(node->v.w),
rn(node->w.q.l), rn(node->w.q.h));
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
- word = jmpi(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (can_sign_extend_jump_p(word))
+ word = jmpi(_jit->pc.w);
+ else
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
jmpi(node->u.w);
break;
case jit_code_callr:
- callr(rn(node->u.w)
#if _CALL_SYSV
- , !!(node->flag & jit_flag_varargs)
+# define xcallr(u, v) callr(u, v)
+# define xcalli_p(u, v) calli_p(u, v)
+# define xcalli(u, v) calli(u, v)
+#else
+# define xcallr(u, v) callr(u)
+# define xcalli_p(u, v) calli_p(u)
+# define xcalli(u, v) calli(u)
#endif
- );
+ xcallr(rn(node->u.w), !!(node->flag & jit_flag_varargs));
break;
case jit_code_calli:
+ value = !!(node->flag & jit_flag_varargs);
if (node->flag & jit_flag_node) {
temp = node->u.n;
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
- word = calli_p(temp->u.w
+ if (temp->flag & jit_flag_patch)
+ xcalli(temp->u.w, value);
+ else {
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
#if _CALL_SYSV
- , !!(node->flag & jit_flag_varargs)
+ if (can_sign_extend_jump_p(word + value * 4))
+ word = xcalli(_jit->pc.w, value);
+ else
#endif
- );
- if (!(temp->flag & jit_flag_patch))
+ word = xcalli_p(_jit->pc.w, value);
patch(word, node);
+ }
}
else
- calli(node->u.w
-#if _CALL_SYSV
- , !!(node->flag & jit_flag_varargs)
-#endif
- );
+ xcalli(node->u.w, value);
break;
case jit_code_prolog:
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
case jit_code_va_arg_d:
vaarg_d(rn(node->u.w), rn(node->v.w));
break;
- case jit_code_live:
- case jit_code_arg: case jit_code_ellipsis:
+ case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i:
+# if __WORDSIZE == 64
+ case jit_code_arg_l:
+# endif
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+#if __WORDSIZE == 64
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
+#endif
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_ui: case jit_code_getarg_l:
#endif
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+#if __WORDSIZE == 64
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
+#endif
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
+#endif
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#if __WORDSIZE == 32
# define MININT 0x80000000
+# define DEC_FMT "%d"
+# define HEX_FMT "0x%x"
#else
# define MININT 0x8000000000000000
+# define DEC_FMT "%ld"
+# define HEX_FMT "0x%lx"
#endif
#define print_hex(value) \
do { \
if (value < 0 && value != MININT) \
- fprintf(print_stream, "-0x%lx", -value); \
+ fprintf(print_stream, "-" HEX_FMT, (jit_uword_t)-value); \
else \
- fprintf(print_stream, "0x%lx", value); \
+ fprintf(print_stream, HEX_FMT, (jit_uword_t)value); \
} while (0)
-#define print_dec(value) fprintf(print_stream, "%ld", value)
+#define print_dec(value) fprintf(print_stream, DEC_FMT, value)
#define print_flt(value) fprintf(print_stream, "%g", value)
#define print_str(value) fprintf(print_stream, "%s", value)
#define print_ptr(value) fprintf(print_stream, "%p", value)
/*
- * Copyright (C) 2015-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2015-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
_jitc->function->self.size = stack_framesize;
#if __arm__
assert(jit_cpu.abi);
- _jitc->function->self.size += 64;
-#endif
-#if __mips__ && NEW_ABI
- /* Only add extra stack space if there are varargs
- * arguments in registers. */
- assert(jit_arg_reg_p(_jitc->function->self.argi));
- _jitc->function->self.size += 64;
+ _jitc->function->alist = NULL;
+#elif __mips__
+ _jitc->function->alist = NULL;
#endif
_jitc->function->self.argi =
_jitc->function->self.argf = _jitc->function->self.argn = 0;
for (; node; node = next) {
next = node->next;
switch (node->code) {
- case jit_code_arg:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i: case jit_code_arg_l:
node->next = (jit_node_t *)0;
- jit_make_arg(node);
+ jit_make_arg(node, node->code);
break;
case jit_code_arg_f:
node->next = (jit_node_t *)0;
/*
- * Copyright (C) 2019-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# undef ui
} instr_t;
# define ii(i) *_jit->pc.ui++ = i
-/* FIXME could jit_rewind_prolog() to only use extra 64 bytes
- * if a variadic jit function that have variadic arguments in
- * registers */
-# define stack_framesize (200 + 64)
# define ldr(r0, r1) ldr_l(r0, r1)
# define ldi(r0, im) ldi_l(r0, im)
# define ldxr(r0, r1, r2) ldxr_l(r0, r1, r2)
static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
# define jmpr(r0) JALR(_ZERO_REGNO, r0, 0)
# define jmpi(im) _jmpi(_jit, im)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
# define jmpi_p(im) _jmpi_p(_jit, im)
static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
# define callr(r0) JALR(_RA_REGNO, r0, 0)
# define calli(im) _calli(_jit, im)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
# define calli_p(im) _calli_p(_jit, im)
static jit_word_t _calli_p(jit_state_t*,jit_word_t);
# define prolog(i0) _prolog(_jit,i0)
return (w);
}
-static void
+static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
jit_int32_t t0;
- jit_word_t dsp;
- dsp = i0 - _jit->pc.w;
+ jit_word_t dsp, w;
+ w = _jit->pc.w;
+ dsp = i0 - w;
if (simm20_p(dsp))
JAL(_ZERO_REGNO, dsp);
else {
jmpr(rn(t0));
jit_unget_reg(t0);
}
+ return (w);
}
static jit_word_t
return (w);
}
-static void
+static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0)
{
jit_int32_t t0;
- jit_word_t dsp;
- dsp = i0 - _jit->pc.w;
+ jit_word_t dsp, w;
+ w = _jit->pc.w;
+ dsp = i0 - w;
if (simm20_p(dsp))
JAL(_RA_REGNO, dsp);
else {
callr(rn(t0));
jit_unget_reg(t0);
}
+ return (w);
}
static jit_word_t
static void
_prolog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t reg;
+ jit_int32_t reg, offs;
if (_jitc->function->define_frame || _jitc->function->assume_frame) {
jit_int32_t frame = -_jitc->function->frame;
+ jit_check_frame();
assert(_jitc->function->self.aoff >= frame);
if (_jitc->function->assume_frame)
return;
_jitc->function->stack = ((_jitc->function->self.alen -
/* align stack at 16 bytes */
_jitc->function->self.aoff) + 15) & -16;
- subi(_SP_REGNO, _SP_REGNO, stack_framesize);
- stxi(0, _SP_REGNO, _RA_REGNO);
- stxi(8, _SP_REGNO, _FP_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _S1))
- stxi(16, _SP_REGNO, 9);
- if (jit_regset_tstbit(&_jitc->function->regset, _S2))
- stxi(24, _SP_REGNO, 18);
- if (jit_regset_tstbit(&_jitc->function->regset, _S3))
- stxi(32, _SP_REGNO, 19);
- if (jit_regset_tstbit(&_jitc->function->regset, _S4))
- stxi(40, _SP_REGNO, 20);
- if (jit_regset_tstbit(&_jitc->function->regset, _S5))
- stxi(48, _SP_REGNO, 21);
- if (jit_regset_tstbit(&_jitc->function->regset, _S6))
- stxi(56, _SP_REGNO, 22);
- if (jit_regset_tstbit(&_jitc->function->regset, _S7))
- stxi(64, _SP_REGNO, 23);
- if (jit_regset_tstbit(&_jitc->function->regset, _S8))
- stxi(72, _SP_REGNO, 24);
- if (jit_regset_tstbit(&_jitc->function->regset, _S9))
- stxi(80, _SP_REGNO, 25);
- if (jit_regset_tstbit(&_jitc->function->regset, _S10))
- stxi(88, _SP_REGNO, 26);
- if (jit_regset_tstbit(&_jitc->function->regset, _S11))
- stxi(96, _SP_REGNO, 27);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
- stxi_d(104, _SP_REGNO, 8);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
- stxi_d(112, _SP_REGNO, 9);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
- stxi_d(120, _SP_REGNO, 18);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
- stxi_d(128, _SP_REGNO, 19);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
- stxi_d(136, _SP_REGNO, 20);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
- stxi_d(144, _SP_REGNO, 21);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
- stxi_d(152, _SP_REGNO, 22);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
- stxi_d(160, _SP_REGNO, 23);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS8))
- stxi_d(168, _SP_REGNO, 24);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS9))
- stxi_d(176, _SP_REGNO, 25);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS10))
- stxi_d(184, _SP_REGNO, 26);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS11))
- stxi_d(192, _SP_REGNO, 27);
- movr(_FP_REGNO, _SP_REGNO);
+
+ if (_jitc->function->stack)
+ _jitc->function->need_stack = 1;
+ if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+ /* check if any callee save register needs to be saved */
+ for (reg = 0; reg < _jitc->reglen; ++reg)
+ if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+ (_rvs[reg].spec & jit_class_sav)) {
+ _jitc->function->need_stack = 1;
+ break;
+ }
+ }
+
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+ if (_jitc->function->need_frame) {
+ stxi(0, _SP_REGNO, _RA_REGNO);
+ stxi(8, _SP_REGNO, _FP_REGNO);
+ }
+ /* callee save registers */
+ for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ stxi(offs, _SP_REGNO, rn(iregs[reg]));
+ offs += sizeof(jit_word_t);
+ }
+ }
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+ offs += sizeof(jit_float64_t);
+ }
+ }
+
+ if (_jitc->function->need_frame)
+ movr(_FP_REGNO, _SP_REGNO);
if (_jitc->function->stack)
subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
if (_jitc->function->allocar) {
}
if (_jitc->function->self.call & jit_call_varargs) {
for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
- stxi(stack_framesize - ((8 - reg) * 8),
+ stxi(jit_framesize() - ((8 - reg) * 8),
_FP_REGNO, rn(JIT_RA0 - reg));
}
}
static void
_epilog(jit_state_t *_jit, jit_node_t *node)
{
+ jit_int32_t reg, offs;
if (_jitc->function->assume_frame)
return;
- movr(_SP_REGNO, _FP_REGNO);
- ldxi(_RA_REGNO, _SP_REGNO, 0);
- ldxi(_FP_REGNO, _SP_REGNO, 8);
- if (jit_regset_tstbit(&_jitc->function->regset, _S1))
- ldxi(9, _SP_REGNO, 16);
- if (jit_regset_tstbit(&_jitc->function->regset, _S2))
- ldxi(18, _SP_REGNO, 24);
- if (jit_regset_tstbit(&_jitc->function->regset, _S3))
- ldxi(19, _SP_REGNO, 32);
- if (jit_regset_tstbit(&_jitc->function->regset, _S4))
- ldxi(20, _SP_REGNO, 40);
- if (jit_regset_tstbit(&_jitc->function->regset, _S5))
- ldxi(21, _SP_REGNO, 48);
- if (jit_regset_tstbit(&_jitc->function->regset, _S6))
- ldxi(22, _SP_REGNO, 56);
- if (jit_regset_tstbit(&_jitc->function->regset, _S7))
- ldxi(23, _SP_REGNO, 64);
- if (jit_regset_tstbit(&_jitc->function->regset, _S8))
- ldxi(24, _SP_REGNO, 72);
- if (jit_regset_tstbit(&_jitc->function->regset, _S9))
- ldxi(25, _SP_REGNO, 80);
- if (jit_regset_tstbit(&_jitc->function->regset, _S10))
- ldxi(26, _SP_REGNO, 88);
- if (jit_regset_tstbit(&_jitc->function->regset, _S11))
- ldxi(27, _SP_REGNO, 96);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
- ldxi_d(8, _SP_REGNO, 104);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
- ldxi_d(9, _SP_REGNO, 112);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
- ldxi_d(18, _SP_REGNO, 120);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
- ldxi_d(19, _SP_REGNO, 128);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
- ldxi_d(20, _SP_REGNO, 136);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
- ldxi_d(21, _SP_REGNO, 144);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
- ldxi_d(22, _SP_REGNO, 152);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
- ldxi_d(23, _SP_REGNO, 160);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS8))
- ldxi_d(24, _SP_REGNO, 168);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS9))
- ldxi_d(25, _SP_REGNO, 176);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS10))
- ldxi_d(26, _SP_REGNO, 184);
- if (jit_regset_tstbit(&_jitc->function->regset, _FS11))
- ldxi_d(27, _SP_REGNO, 192);
- addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+ if (_jitc->function->need_frame) {
+ movr(_SP_REGNO, _FP_REGNO);
+ ldxi(_RA_REGNO, _SP_REGNO, 0);
+ ldxi(_FP_REGNO, _SP_REGNO, 8);
+ }
+
+ /* callee save registers */
+ for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+ offs += sizeof(jit_word_t);
+ }
+ }
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+ offs += sizeof(jit_float64_t);
+ }
+ }
+
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ addi(_SP_REGNO, _SP_REGNO, jit_framesize());
RET();
}
assert(_jitc->function->self.call & jit_call_varargs);
/* Initialize va_list to the first stack argument. */
if (jit_arg_reg_p(_jitc->function->vagp))
- addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8));
+ addi(r0, _FP_REGNO, jit_framesize() - ((8 - _jitc->function->vagp) * 8));
else
- addi(r0, _FP_REGNO, _jitc->function->self.size);
+ addi(r0, _FP_REGNO, jit_selfsize());
}
static void
}
else
abort();
- i.w = u.i[1];
assert(i.I.opcode == 3 && i.I.funct3 == 3); /* LD */
}
# else
/*
- * Copyright (C) 2019-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 116
+#define JIT_INSTR_MAX 168
0, /* data */
0, /* live */
4, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
8, /* va_arg_d */
12, /* movi */
12, /* movnr */
12, /* movzr */
+ 28, /* casr */
+ 40, /* casi */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
8, /* extr_us */
4, /* extr_i */
8, /* extr_ui */
+ 20, /* bswapr_us */
+ 52, /* bswapr_ui */
+ 116, /* bswapr_ul */
20, /* htonr_us */
52, /* htonr_ui */
116, /* htonr_ul */
4, /* callr */
16, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
4, /* movr_w_f */
0, /* movr_ww_d */
4, /* movr_w_d */
- 0, /* movr_f_w */
+ 4, /* movr_f_w */
4, /* movi_f_w */
0, /* movr_d_ww */
0, /* movi_d_ww */
4, /* movr_d_w */
12, /* movi_d_w */
- 20, /* bswapr_us */
- 52, /* bswapr_ui */
- 116, /* bswapr_ul */
- 28, /* casr */
- 40, /* casi */
+ 168, /* clo */
+ 148, /* clz */
+ 168, /* cto */
+ 148, /* ctz */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2019-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* Paulo Cesar Pereira de Andrade
*/
+/* callee save + variadic arguments
+ * align16(ra+fp+s[1-9]+s10+s11+fs[0-9]+fs10+fs11)+align16(a[0-7]) */
+#define stack_framesize (208 + 64)
+
#define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8)
#define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8)
/*
* Prototypes
*/
+#define compute_framesize() _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
#if __WORDSIZE == 64
# define load_const(r0, i0) _load_const(_jit, r0, i0)
static void _load_const(jit_state_t*, jit_int32_t, jit_word_t);
#define PROTO 1
# include "jit_riscv-cpu.c"
# include "jit_riscv-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
{ _NOREG, "<none>" },
};
+static jit_int32_t iregs[] = {
+ _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8, _S9, _S10, _S11
+};
+
+static jit_int32_t fregs[] = {
+ _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7, _FS8, _FS9, _FS10, _FS11
+};
+
/*
* Implementation
*/
_jit_allocai(jit_state_t *_jit, jit_int32_t length)
{
assert(_jitc->function);
+ jit_check_frame();
switch (length) {
case 0: case 1: break;
case 2: _jitc->function->self.aoff &= -2; break;
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
- if (JIT_RET != u)
- jit_movr(JIT_RET, u);
- jit_live(JIT_RET);
+ jit_code_inc_synth_w(code, u);
+ jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- if (u->code == jit_code_arg)
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
- return (jit_arg_f_reg_p(u->u.w));
+ return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
}
void
_jit_ellipsis(jit_state_t *_jit)
{
jit_inc_synth(ellipsis);
+ jit_check_frame();
if (_jitc->prepare) {
jit_link_prepare();
assert(!(_jitc->function->call.call & jit_call_varargs));
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
assert(_jitc->function);
assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
if (jit_arg_reg_p(_jitc->function->self.argi))
offset = _jitc->function->self.argi++;
else {
offset = _jitc->function->self.size;
_jitc->function->self.size += sizeof(jit_word_t);
+ jit_check_frame();
}
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
else {
offset = _jitc->function->self.size;
_jitc->function->self.size += sizeof(jit_word_t);
+ jit_check_frame();
}
node = jit_new_node_ww(jit_code_arg_f, offset,
++_jitc->function->self.argn);
else {
offset = _jitc->function->self.size;
_jitc->function->self.size += sizeof(jit_word_t);
+ jit_check_frame();
}
node = jit_new_node_ww(jit_code_arg_d, offset,
++_jitc->function->self.argn);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_c(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_c(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_uc(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_s(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_s(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_us(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_us(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_i(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_i(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_i(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_ui, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_ui(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_ui(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_l, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_l(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_l(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(JIT_RA0 - v->u.w, u);
- else
- jit_stxi(v->u.w, JIT_FP, u);
+ else {
+ jit_node_t *node = jit_stxi(v->u.w, JIT_FP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movi(JIT_RA0 - v->u.w, u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
- jit_stxi(v->u.w, JIT_FP, regno);
+ node = jit_stxi(v->u.w, JIT_FP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
jit_movr_f(u, JIT_FA0 - v->u.w);
else if (jit_arg_reg_p(v->u.w - 8))
jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
- else
- jit_ldxi_f(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_f(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_movr_f(JIT_FA0 - v->u.w, u);
else if (jit_arg_reg_p(v->u.w - 8))
jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
- else
- jit_stxi_f(v->u.w, JIT_FP, u);
+ else {
+ jit_node_t *node = jit_stxi_f(v->u.w, JIT_FP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_inc_synth_fp(putargi_f, u, v);
if (jit_arg_f_reg_p(v->u.w))
jit_movi_f(JIT_FA0 - v->u.w, u);
- else if (jit_arg_reg_p(v->u.w - 8)) {
- union {
- jit_float32_t f;
- jit_int32_t i;
- } uu;
- uu.f = u;
- jit_movi(JIT_RA0 - (v->u.w - 8), uu.i);
- }
+ else if (jit_arg_reg_p(v->u.w - 8))
+ jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
- jit_stxi_f(v->u.w, JIT_FP, regno);
+ node = jit_stxi_f(v->u.w, JIT_FP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
jit_movr_d(u, JIT_FA0 - v->u.w);
else if (jit_arg_reg_p(v->u.w - 8))
jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
- else
- jit_ldxi_d(u, JIT_FP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_d(u, JIT_FP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_movr_d(JIT_FA0 - v->u.w, u);
else if (jit_arg_reg_p(v->u.w - 8))
jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
- else
- jit_stxi_d(v->u.w, JIT_FP, u);
+ else {
+ jit_node_t *node = jit_stxi_d(v->u.w, JIT_FP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
jit_inc_synth_dp(putargi_d, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movi_d(JIT_FA0 - v->u.w, u);
- else if (jit_arg_reg_p(v->u.w - 8)) {
- union {
- jit_float64_t d;
- jit_int64_t w;
- } uu;
- uu.d = u;
- jit_movi(JIT_RA0 - (v->u.w - 8), uu.w);
- }
+ else if (jit_arg_reg_p(v->u.w - 8))
+ jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u);
else {
+ jit_node_t *node;
regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
- jit_stxi_d(v->u.w, JIT_FP, regno);
+ node = jit_stxi_d(v->u.w, JIT_FP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
assert(_jitc->function);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
else {
jit_stxi(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
jit_stxi(_jitc->function->call.size, JIT_SP, regno);
jit_unget_reg(regno);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
else {
jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
jit_unget_reg(regno);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
else {
jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
jit_unget_reg(regno);
_jitc->function->call.size += sizeof(jit_word_t);
+ jit_check_frame();
}
jit_dec_synth();
}
{
jit_node_t *node;
assert(_jitc->function);
+ jit_check_frame();
jit_inc_synth_w(finishr, r0);
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
{
jit_node_t *node;
assert(_jitc->function);
+ jit_check_frame();
jit_inc_synth_w(finishi, (jit_word_t)i0);
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
jit_node_t *node;
jit_uint8_t *data;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
+ case jit_code_skip:
+ nop((node->u.w + 3) & ~3);
+ break;
case jit_code_note: case jit_code_name:
node->u.w = _jit->pc.w;
break;
case_rrw(rsh, _u);
case_rr(neg,);
case_rr(com,);
+#define clor(r0, r1) fallback_clo(r0, r1)
+#define clzr(r0, r1) fallback_clz(r0, r1)
+#define ctor(r0, r1) fallback_cto(r0, r1)
+#define ctzr(r0, r1) fallback_ctz(r0, r1)
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_rrr(and,);
case_rrw(and,);
case_rrr(or,);
case_brr(bunord, _d);
case_brd(bunord);
case jit_code_jmpr:
+ jit_check_frame();
jmpr(rn(node->u.w));
break;
case jit_code_jmpi:
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (simm20_p(word))
+ word = jmpi(_jit->pc.w);
+ else
word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
- else
+ else {
+ jit_check_frame();
jmpi(node->u.w);
+ }
break;
case jit_code_callr:
+ jit_check_frame();
callr(rn(node->u.w));
break;
case jit_code_calli:
if (temp->flag & jit_flag_patch)
calli(temp->u.w);
else {
- word = calli_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (simm20_p(word))
+ word = calli(_jit->pc.w);
+ else
+ word = calli_p(_jit->pc.w);
patch(word, node);
}
}
- else
+ else {
+ jit_check_frame();
calli(node->u.w);
+ }
break;
case jit_code_prolog:
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
undo.patch_offset = _jitc->patches.offset;
restart_function:
+ compute_framesize();
+ patch_alist(0);
_jitc->again = 0;
prolog(node);
break;
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ undo.func.need_frame = _jitc->function->need_frame;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ /* this will be recomputed but undo anyway to have it
+ * better self documented.*/
+ undo.func.need_stack = _jitc->function->need_stack;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
_jitc->patches.offset = undo.patch_offset;
+ patch_alist(1);
goto restart_function;
}
/* remember label is defined */
case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
- case jit_code_arg:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i:
+ case jit_code_arg_l:
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_i: case jit_code_getarg_ui:
case jit_code_getarg_l:
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
#define CODE 1
# include "jit_riscv-cpu.c"
# include "jit_riscv-fpu.c"
+# include "jit_fallback.c"
#undef CODE
static void
stxi_d(i0, rn(r0), rn(r1));
}
+#if __WORDSIZE != 64
+# error "only 64 bit ports tested"
+#endif
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+ jit_int32_t reg;
+ _jitc->framesize = 16; /* ra+fp */
+ for (reg = 0; reg < jit_size(iregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+ _jitc->framesize += sizeof(jit_word_t);
+
+ for (reg = 0; reg < jit_size(fregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+ _jitc->framesize += sizeof(jit_float64_t);
+
+ /* Space to store variadic arguments */
+ if (_jitc->function->self.call & jit_call_varargs)
+ _jitc->framesize += (8 - _jitc->function->vagp) * 8;
+
+ /* Make sure functions called have a 16 byte aligned stack */
+ _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
static void
_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
{
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# define EAR(R1,R2) RRE_(0xB24F,R1,R2)
/* EXTRACT PSW */
# define EPSW(R1,R2) RRE_(0xB98D,R1,R2)
+/* FIND LEFTMOST ONE */
+# define FLOGR(R1,R2) RRE_(0xB983,R1,R2)
/* INSERT CHARACTER */
# define IC(R1,D2,X2,B2) RX_(0x43,R1,X2,B2,D2)
# define ICY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x73)
static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
# define movi_p(r0,i0) _movi_p(_jit,r0,i0)
static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
-# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1)
-# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1)
-# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1)
+# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+# if __WORDSIZE == 64
+#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1)
+static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#endif
# define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2)
static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
# define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2)
# if __WORDSIZE == 32
# define lshr(r0,r1,r2) _lshr(_jit,r0,r1,r2)
static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-# else
-# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2)
-# endif
-# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0)
+# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0)
static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-# if __WORDSIZE == 32
# define rshr(r0,r1,r2) _rshr(_jit,r0,r1,r2)
static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-# else
-# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2)
-# endif
-# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0)
+# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0);
static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-# if __WORDSIZE == 32
# define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2)
static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# else
+# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2)
+# define lshi(r0,r1,i0) SLLG(r0,r1,i0,0)
+# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2)
+# define rshi(r0,r1,i0) SRAG(r0,r1,i0,0)
# define rshr_u(r0,r1,r2) SRLG(r0,r1,0,r2)
+# define rshi_u(r0,r1,i0) SRLG(r0,r1,i0,0)
# endif
-# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0)
-static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
# if __WORDSIZE == 32
# define negr(r0,r1) LCR(r0,r1)
# else
# define negr(r0,r1) LCGR(r0,r1)
# endif
+# define bitswap(r0, r1) _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# define comr(r0,r1) _comr(_jit,r0,r1)
static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
# define andr(r0,r1,r2) _andr(_jit,r0,r1,r2)
# define bmci(i0,r0,i1) bmxi(CC_E,i0,r0,i1)
# define bmci_p(i0,r0,i1) bmxi_p(CC_E,i0,r0,i1)
# define jmpr(r0) BR(r0)
-# define jmpi(i0) _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+# define jmpi(i0,i1) _jmpi(_jit,i0,i1)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t, jit_bool_t);
# define jmpi_p(i0) _jmpi_p(_jit,i0)
static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
# define callr(r0) BALR(_R14_REGNO,r0)
-# define calli(i0) _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+# define calli(i0,i1) _calli(_jit,i0,i1)
+static jit_word_t _calli(jit_state_t*,jit_word_t, jit_bool_t);
# define calli_p(i0) _calli_p(_jit,i0)
static jit_word_t _calli_p(jit_state_t*,jit_word_t);
# define prolog(i0) _prolog(_jit,i0)
patch_at(w, _jit->pc.w);
}
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ LRVR(r0, r1);
+ SRL(r0, 16, 0);
+ LLGHR(r0, r0);
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ LRVR(r0, r1);
+# if __WORDSIZE == 64
+ LLGFR(r0, r0);
+# endif
+}
+
+#if __WORDSIZE == 64
+static void
+_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ LRVGR(r0, r1);
+}
+#endif
+
static void
_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
SLL(r0, 0, r2);
}
}
-#endif
static void
_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
- reg = jit_get_reg_but_zero(0);
- movi(rn(reg), i0);
- lshr(r0, r1, rn(reg));
- jit_unget_reg_but_zero(reg);
+ movr(r0, r1);
+ SLL(r0, i0, 0);
}
-# if __WORDSIZE == 32
static void
_rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
SRA(r0, 0, r2);
}
}
-#endif
static void
_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
- reg = jit_get_reg_but_zero(0);
- movi(rn(reg), i0);
- rshr(r0, r1, rn(reg));
- jit_unget_reg_but_zero(reg);
+ movr(r0, r1);
+ SRA(r0, i0, 0);
}
-# if __WORDSIZE == 32
static void
_rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
SRL(r0, 0, r2);
}
}
-#endif
static void
_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
- jit_int32_t reg;
- reg = jit_get_reg_but_zero(0);
- movi(rn(reg), i0);
- rshr_u(r0, r1, rn(reg));
- jit_unget_reg_but_zero(reg);
+ movr(r0, r1);
+ SRL(r0, i0, 0);
+}
+#endif
+
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t t0, t1, t2, t3, t4;
+ movr(r0, r1);
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ t2 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+ rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+ rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+ rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
+ rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# if __WORDSIZE == 32
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ lshi(rn(t2), r0, 16); /* t2 = v << 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# else
+ movi(rn(t0), 0x0000ffff0000ffffL);
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
+ lshi(rn(t2), r0, 32); /* t2 = v << 32 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# endif
+ jit_unget_reg(t2);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+ comr(r0, r1);
+ clzr(r0, r0);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_clo(r0, r1);
+#endif
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+#if __WORDSIZE == 32
+ jit_word_t w;
+#endif
+ jit_int32_t regno;
+ regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+ SLLG(rn(regno), r1, 32, 0);
+#else
+ movr(rn(regno), r1);
+#endif
+ FLOGR(rn(regno), rn(regno));
+ movr(r0, rn(regno));
+#if __WORDSIZE == 32
+ w = blei_p(_jit->pc.w, r0, 31);
+ rshi(r0, r0, 1); /* r0 is 64 */
+ patch_at(w, _jit->pc.w);
+#endif
+ jit_unget_reg_pair(regno);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_clz(r0, r1);
+#endif
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+ bitswap(r0, r1);
+ clor(r0, r0);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_cto(r0, r1);
+#endif
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+ if (jit_cpu.flogr) {
+#endif
+ bitswap(r0, r1);
+ clzr(r0, r0);
+#if CHECK_FLOGR
+ }
+ else
+ fallback_ctz(r0, r1);
+#endif
}
static void
}
#endif
-static void
-_jmpi(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
{
- jit_word_t d;
jit_int32_t reg;
- d = (i0 - _jit->pc.w) >> 1;
- if (s16_p(d))
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 1;
+ if (i1 && s16_p(d))
J(x16(d));
else if (s32_p(d))
BRL(d);
jmpr(rn(reg));
jit_unget_reg_but_zero(reg);
}
+ return (w);
}
static jit_word_t
return (w);
}
-static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
{
- jit_word_t d;
jit_int32_t reg;
- d = (i0 - _jit->pc.w) >> 1;
- if (s32_p(d))
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 1;
+ if (i1 && s16_p(d))
+ BRAS(_R14_REGNO, x16(d));
+ else if (s32_p(d))
BRASL(_R14_REGNO, d);
else {
reg = jit_get_reg_but_zero(0);
callr(rn(reg));
jit_unget_reg_but_zero(reg);
}
+ return (w);
}
static jit_word_t
u.s[7] = i1.s;
#endif
}
- /* BRC */
+ /* BRC or BRL */
else if (i0.b.op == 0xA7) {
- assert(i0.b.r3 == 0x4);
+ assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5);
d = (label - instr) >> 1;
assert(s16_p(d));
i1.b.i2 = d;
u.s[1] = i1.s;
}
- /* BRCL */
+ /* BRCL or BRASL */
else if (i0.b.op == 0xC0) {
- assert(i0.b.r3 == 0x4);
+ assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5);
d = (label - instr) >> 1;
assert(s32_p(d));
i12.i = d;
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
-
#if __WORDSIZE == 32
-#define JIT_INSTR_MAX 94
+#define JIT_INSTR_MAX 164
0, /* data */
0, /* live */
- 2, /* align */
+ 4, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
2, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
40, /* va_start */
- 86, /* va_arg */
- 82, /* va_arg_d */
+ 82, /* va_arg */
+ 78, /* va_arg_d */
0, /* va_end */
4, /* addr */
12, /* addi */
14, /* rsbi */
6, /* mulr */
14, /* muli */
- 46, /* qmulr */
- 50, /* qmuli */
+ 38, /* qmulr */
+ 42, /* qmuli */
10, /* qmulr_u */
18, /* qmuli_u */
10, /* divr */
4, /* xorr */
12, /* xori */
8, /* lshr */
- 10, /* lshi */
+ 6, /* lshi */
8, /* rshr */
- 10, /* rshi */
+ 6, /* rshi */
8, /* rshr_u */
- 10, /* rshi_u */
+ 6, /* rshi_u */
2, /* negr */
8, /* comr */
16, /* ltr */
8, /* movi */
14, /* movnr */
14, /* movzr */
+ 22, /* casr */
+ 28, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
+ 12, /* bswapr_us */
+ 4, /* bswapr_ui */
+ 0, /* bswapr_ul */
4, /* htonr_us */
2, /* htonr_ui */
0, /* htonr_ul */
8, /* bxsubr_u */
12, /* bxsubi_u */
2, /* jmpr */
- 10, /* jmpi */
+ 6, /* jmpi */
2, /* callr */
- 10, /* calli */
+ 6, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 38, /* bswapr_us */
- 94, /* bswapr_ui */
- 0, /* bswapr_ul */
- 22, /* casr */
- 28, /* casi */
+ 36, /* clo */
+ 28, /* clz */
+ 164, /* cto */
+ 158, /* ctz */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 300
+#define JIT_INSTR_MAX 280
0, /* data */
0, /* live */
- 6, /* align */
+ 20, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
2, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
44, /* va_start */
- 104, /* va_arg */
- 100, /* va_arg_d */
+ 100, /* va_arg */
+ 96, /* va_arg_d */
0, /* va_end */
8, /* addr */
24, /* addi */
28, /* rsbi */
8, /* mulr */
24, /* muli */
- 60, /* qmulr */
- 68, /* qmuli */
+ 52, /* qmulr */
+ 60, /* qmuli */
16, /* qmulr_u */
32, /* qmuli_u */
12, /* divr */
8, /* xorr */
24, /* xori */
6, /* lshr */
- 10, /* lshi */
+ 6, /* lshi */
6, /* rshr */
- 10, /* rshi */
+ 6, /* rshi */
6, /* rshr_u */
- 10, /* rshi_u */
+ 6, /* rshi_u */
4, /* negr */
12, /* comr */
20, /* ltr */
16, /* movi */
18, /* movnr */
18, /* movzr */
+ 30, /* casr */
+ 42, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
4, /* extr_i */
4, /* extr_ui */
+ 12, /* bswapr_us */
+ 8, /* bswapr_ui */
+ 4, /* bswapr_ul */
4, /* htonr_us */
4, /* htonr_ui */
4, /* htonr_ul */
6, /* ldr_c */
18, /* ldi_c */
6, /* ldr_uc */
- 18, /* ldi_uc */
+ 22, /* ldi_uc */
6, /* ldr_s */
18, /* ldi_s */
6, /* ldr_us */
14, /* ldxr_l */
26, /* ldxi_l */
4, /* str_c */
- 16, /* sti_c */
+ 20, /* sti_c */
4, /* str_s */
16, /* sti_s */
4, /* str_i */
10, /* bxsubr_u */
14, /* bxsubi_u */
2, /* jmpr */
- 18, /* jmpi */
+ 6, /* jmpi */
2, /* callr */
- 18, /* calli */
+ 14, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 60, /* bswapr_us */
- 140, /* bswapr_ui */
- 300, /* bswapr_ul */
- 30, /* casr */
- 42, /* casi */
+ 24, /* clo */
+ 12, /* clz */
+ 280, /* cto */
+ 272, /* ctz */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* Authors:
* Paulo Cesar Pereira de Andrade
*/
+#define CHECK_FLOGR 0
+
+#if CHECK_FLOGR
+#include <signal.h>
+#include <setjmp.h>
+#endif
#include <lightning.h>
#include <lightning/jit_private.h>
#define PROTO 1
# include "jit_s390-cpu.c"
# include "jit_s390-fpu.c"
+# if CHECK_FLOGR
+# include "jit_fallback.c"
+# endif
#undef PROTO
/*
* Initialization
*/
+jit_cpu_t jit_cpu;
jit_register_t _rvs[] = {
{ rc(gpr) | 0x0, "%r0" },
{ rc(gpr) | 0x1, "%r1" },
{ rc(fpr) | rc(arg) | 0x0, "%f0" },
{ _NOREG, "<none>" },
};
+#if CHECK_FLOGR
+static sigjmp_buf jit_env;
+#endif
/*
* Implementation
*/
+#if CHECK_FLOGR
+static void
+sigill_handler(int signum)
+{
+ jit_cpu.flogr = 0;
+ siglongjmp(jit_env, 1);
+}
+#endif
+
void
jit_get_cpu(void)
{
+#if CHECK_FLOGR
+ int r12, r13;
+ struct sigaction new_action, old_action;
+ new_action.sa_handler = sigill_handler;
+ sigemptyset(&new_action.sa_mask);
+ new_action.sa_flags = 0;
+ sigaction(SIGILL, NULL, &old_action);
+ if (old_action.sa_handler != SIG_IGN) {
+ sigaction(SIGILL, &new_action, NULL);
+ if (!sigsetjmp(jit_env, 1)) {
+ jit_cpu.flogr = 1;
+ /* flogr %r12, %r12 */
+ __asm__ volatile("lgr %%r12, %0; lgr %%r13, %1;"
+ "flogr %%r12, %%r12;"
+ "lgr %1, %%r13; lgr %0, %%r12;"
+ : "=r" (r12), "=r" (r13));
+ sigaction(SIGILL, &old_action, NULL);
+ }
+ }
+#else
+ /* By default, assume it is available */
+ jit_cpu.flogr = 1;
+#endif
}
void
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
+ jit_code_inc_synth_w(code, u);
jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- if (u->code == jit_code_arg)
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
return (jit_arg_f_reg_p(u->u.w));
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
assert(_jitc->function);
+ assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
if (jit_arg_reg_p(_jitc->function->self.argi))
offset = _jitc->function->self.argi++;
else {
offset = _jitc->function->self.size;
_jitc->function->self.size += sizeof(jit_word_t);
}
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, _R2 - v->u.w);
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, _R2 - v->u.w);
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, _R2 - v->u.w);
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, _R2 - v->u.w);
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w)) {
#if __WORDSIZE == 32
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_ui, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_ui(u, _R2 - v->u.w);
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_l, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, _R2 - v->u.w);
#endif
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(_R2 - v->u.w, u);
else
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movi(_R2 - v->u.w, u);
else {
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
assert(_jitc->function);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movr(_R2 - _jitc->function->call.argi, u);
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movi(_R2 - _jitc->function->call.argi, u);
struct {
jit_node_t *node;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
+ case jit_code_skip:
+ nop((node->u.w + 1) & ~1);
+ break;
case jit_code_note: case jit_code_name:
node->u.w = _jit->pc.w;
break;
case_rrw(rsh, _u);
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_rrr(and,);
case_rrw(and,);
case_rrr(or,);
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
if (temp->flag & jit_flag_patch)
- jmpi(temp->u.w);
+ jmpi(temp->u.w, 1);
else {
- word = jmpi_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (s32_p(word)) {
+ offset = s16_p(word);
+ word = jmpi(_jit->pc.w, offset);
+ }
+ else
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
else
- jmpi(node->u.w);
+ jmpi(node->u.w, 1);
break;
case jit_code_callr:
callr(rn(node->u.w));
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
if (temp->flag & jit_flag_patch)
- calli(temp->u.w);
+ calli(temp->u.w, 1);
else {
- word = calli_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (s32_p(word)) {
+ offset =s16_p(word);
+ word = calli(_jit->pc.w, offset);
+ }
+ else
+ word = calli_p(_jit->pc.w);
patch(word, node);
}
}
else
- calli(node->u.w);
+ calli(node->u.w, 1);
break;
case jit_code_prolog:
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
- case jit_code_arg:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i:
+# if __WORDSIZE == 64
+ case jit_code_arg_l:
+# endif
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+#if __WORDSIZE == 64
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
+#endif
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_ui: case jit_code_getarg_l:
#endif
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+#if __WORDSIZE == 64
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
+#endif
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
+#endif
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
#define CODE 1
# include "jit_s390-cpu.c"
# include "jit_s390-fpu.c"
+# if CHECK_FLOGR
+# include "jit_fallback.c"
+# endif
#undef CODE
void
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
*/
static jit_int16_t _szs[jit_code_last_code] = {
#if GET_JIT_SIZE
-# define JIT_INSTR_MAX 512
+# define JIT_INSTR_MAX 1024
#else
# if defined(__i386__) || defined(__x86_64__)
# include "jit_x86-sz.c"
break;
}
# endif
- size += _szs[node->code];
+ switch (node->code) {
+ /* The instructions are special because they can be arbitrarily long. */
+ case jit_code_align:
+ case jit_code_skip:
+ size += node->u.w;
+ break;
+ default:
+ size += _szs[node->code];
+ }
}
# if __riscv && __WORDSIZE == 64
/* Heuristically only 20% of constants are unique. */
{
#if GET_JIT_SIZE
FILE *fp;
- jit_word_t offset;
+ int offset;
/* Define a single path */
fp = fopen(JIT_SIZE_PATH, "a");
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
static void _f3a(jit_state_t*,jit_int32_t,
jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
maybe_unused;
+# define f2c1(op,rd,op3,rs1,opf,rs2) _f2c1(_jit,op,rd,op3,rs1,opf,rs2)
+static void
+_f2c1(jit_state_t*,jit_int32_t, jit_int32_t,
+ jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
+ maybe_unused;
# define LDSB(rs1, rs2, rd) f3r(3, rd, 9, rs1, rs2)
# define LDSBI(rs1, imm, rd) f3i(3, rd, 9, rs1, imm)
# define LDSH(rs1, rs2, rd) f3r(3, rd, 10, rs1, rs2)
# define UNIMP(imm) f2r(0, 0, 0, imm)
# define FLUSH(rs1, rs2) f3r(2, 0, 59, rs1, rs2)
# define FLUSHI(rs1, im) f3i(2, 0, 59, rs1, imm)
+# define LZCNT(rs2, rd) f2c1(2, rd, 54, 0, 23, rs2)
# define nop(i0) _nop(_jit, i0)
static void _nop(jit_state_t*, jit_int32_t);
# define movr(r0, r1) _movr(_jit, r0, r1)
#define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0)
# define comr(r0, r1) XNOR(r1, 0, r0)
# define negr(r0, r1) NEG(r1, r0)
+# define bitswap(r0, r1) _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# define addr(r0, r1, r2) ADD(r1, r2, r0)
# define addi(r0, r1, i0) _addi(_jit, r0, r1, i0)
static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
# define jmpr(r0) _jmpr(_jit, r0)
static void _jmpr(jit_state_t*,jit_int32_t);
# define jmpi(i0) _jmpi(_jit, i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
# define jmpi_p(i0) _jmpi_p(_jit, i0)
static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
# define callr(r0) _callr(_jit, r0)
static void _callr(jit_state_t*,jit_int32_t);
# define calli(i0) _calli(_jit, i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
# define calli_p(i0) _calli_p(_jit, i0)
static jit_word_t _calli_p(jit_state_t*,jit_word_t);
# define prolog(node) _prolog(_jit, node)
ii(v.v);
}
+static void
+_f2c1(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+ jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2)
+{
+ jit_instr_t v;
+ assert(!(op & 0xfffffffc));
+ assert(!(rd & 0xffffffe0));
+ assert(!(res & 0xffffffc0));
+ assert(!(rs1 & 0xffffffe0));
+ assert(!(opf & 0xfffffe00));
+ assert(!(rs2 & 0xfffffe00));
+ v.op.b = op;
+ v.rd.b = rd;
+ v.op3.b = op3;
+ v.rs1.b = rs1;
+ v.opf.b = opf;
+ v.rs2.b = rs2;
+ ii(v.v);
+}
+
static void
_nop(jit_state_t *_jit, jit_int32_t i0)
{
jit_unget_reg(r1_reg);
}
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_int32_t t0, t1, t2, t3, t4;
+ movr(r0, r1);
+ t0 = jit_get_reg(jit_class_gpr);
+ t1 = jit_get_reg(jit_class_gpr);
+ t2 = jit_get_reg(jit_class_gpr);
+ movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+ rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+ rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+ rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL);
+ rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# if __WORDSIZE == 32
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ lshi(rn(t2), r0, 16); /* t2 = v << 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# else
+ movi(rn(t0), 0x0000ffff0000ffffL);
+ rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */
+ andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */
+ andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/
+ lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+ rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */
+ lshi(rn(t2), r0, 32); /* t2 = v << 32 */
+ orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */
+# endif
+ jit_unget_reg(t2);
+ jit_unget_reg(t1);
+ jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.lzcnt) {
+ comr(r0, r1);
+ clzr(r0, r0);
+ }
+ else
+ fallback_clo(r0, r1);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.lzcnt) {
+#if __WORDSIZE == 32
+ jit_word_t w;
+ SLLXI(r1, 32, r0);
+ LZCNT(r0, r0);
+#if __WORDSIZE == 32
+ w = blei(_jit->pc.w, r0, 31);
+ rshi(r0, r0, 1); /* r0 is 64 */
+ patch_at(w, _jit->pc.w);
+#endif
+#else
+ LZCNT(r1, r0);
+ }
+ else
+ fallback_clz(r0, r1);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.lzcnt) {
+ bitswap(r0, r1);
+ clor(r0, r0);
+ }
+ else
+ fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ if (jit_cpu.lzcnt) {
+ bitswap(r0, r1);
+ clzr(r0, r0);
+ }
+ else
+ fallback_ctz(r0, r1);
+}
+
static void
_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
# if __WORDSIZE == 32
B(cc, (i0 - w) >> 2);
# else
- B(cc, (i0 - w) >> 2);
+ BP(cc, (i0 - w) >> 2);
# endif
NOP();
}
NOP();
}
-static void
+static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t w;
jit_int32_t reg;
- w = (i0 - _jit->pc.w) >> 2;
- if (s22_p(w)) {
- BA(w);
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 2;
+ if (s22_p(d)) {
+ BA(d);
NOP();
}
else {
jmpr(rn(reg));
jit_unget_reg(reg);
}
+ return (w);
}
static jit_word_t
NOP();
}
-static void
+static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t w;
- w = (i0 - _jit->pc.w) >> 2;
- if (s30_p(w)) {
- CALLI(w);
+ jit_word_t d, w;
+ w = _jit->pc.w;
+ d = (i0 - w) >> 2;
+ if (s30_p(d)) {
+ CALLI(d);
NOP();
}
else
- (void)calli_p(i0);
+ w = calli_p(i0);
+ return (w);
}
static jit_word_t
{
if (_jitc->function->assume_frame)
return;
- /* (most) other backends do not save incoming arguments, so,
- * only save locals here */
+ if (_jitc->function->allocar)
+ subi(_SP_REGNO, _FP_REGNO, _jitc->function->stack);
if (jit_regset_tstbit(&_jitc->function->regset, _L0))
- ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0));
+ ldxi(_L0_REGNO, _SP_REGNO, _jitc->function->stack + OFF(0));
if (jit_regset_tstbit(&_jitc->function->regset, _L1))
- ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1));
+ ldxi(_L1_REGNO, _SP_REGNO, _jitc->function->stack + OFF(1));
if (jit_regset_tstbit(&_jitc->function->regset, _L2))
- ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2));
+ ldxi(_L2_REGNO, _SP_REGNO, _jitc->function->stack + OFF(2));
if (jit_regset_tstbit(&_jitc->function->regset, _L3))
- ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3));
+ ldxi(_L3_REGNO, _SP_REGNO, _jitc->function->stack + OFF(3));
if (jit_regset_tstbit(&_jitc->function->regset, _L4))
- ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4));
+ ldxi(_L4_REGNO, _SP_REGNO, _jitc->function->stack + OFF(4));
if (jit_regset_tstbit(&_jitc->function->regset, _L5))
- ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5));
+ ldxi(_L5_REGNO, _SP_REGNO, _jitc->function->stack + OFF(5));
if (jit_regset_tstbit(&_jitc->function->regset, _L6))
- ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6));
+ ldxi(_L6_REGNO, _SP_REGNO, _jitc->function->stack + OFF(6));
if (jit_regset_tstbit(&_jitc->function->regset, _L7))
- ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7));
+ ldxi(_L7_REGNO, _SP_REGNO, _jitc->function->stack + OFF(7));
RESTOREI(0, 0, 0);
RETL();
NOP();
else
abort();
}
+ else if (i.op.b == 1) {
+ assert(s30_p((label - instr) >> 2));
+ i.disp30.b = (label - instr) >> 2;
+ u.i[0] = i.v;
+ }
else
abort();
}
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
}
# if __WORDSIZE == 64
+/* Handle the special case of using all float registers, as exercised
+ * in check/carg.c.
+ * For example:
+ * putargr_f JIT_F0 $ARG
+ * where JIT_F0 is %f32 and $ARG is %f31 and if %f30 (the mapping for %f31)
+ * is live, the jit_get_reg() call might return %f30, but, because it is
+ * live, will spill/reload it, generating assembly:
+ *
+ * std %f30, [ %fp + OFFS ]
+ * fmovd %f32, %f30
+ * fmovs %f30, %f31
+ * ldd [ %fp + OFFS ], %f30
+ *
+ * what basically becomes a noop as it restores the old value.
+ */
+#define get_sng_reg(u) _get_sng_reg(_jit, u)
+static jit_int32_t
+_get_sng_reg(jit_state_t *_jit, jit_int32_t r0)
+{
+ jit_int32_t reg, tmp;
+ /* Attempt to get a nospill register */
+ reg = jit_get_reg(CLASS_SNG | jit_class_nospill | jit_class_chk);
+ if (reg == JIT_NOREG) {
+ /* Will need to spill, so allow spilling it. */
+ reg = jit_get_reg(CLASS_SNG);
+ /* If the special condition happens, allocate another one.
+ * This will generate uglier machine code (code for floats
+ * is already ugly), but will work, but doing a double
+ * spill/reload; the first one being a noop. */
+ if (rn(reg) == r0 - 1) {
+ tmp = reg;
+ reg = jit_get_reg(CLASS_SNG);
+ jit_unget_reg(tmp);
+ }
+ }
+ return (reg);
+}
+
static void
_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
if (single_precision_p(r1))
FMOVS(r1, r0);
else {
- t1 = jit_get_reg(CLASS_SNG);
+ t1 = get_sng_reg(r0);
movr_d(rn(t1), r1);
FMOVS(rn(t1), r0);
jit_unget_reg(t1);
}
else {
if (single_precision_p(r1)) {
- t0 = jit_get_reg(CLASS_SNG);
+ t0 = get_sng_reg(r0);
FMOVS(r1, rn(t0));
movr_d(r0, rn(t0));
jit_unget_reg(t0);
}
else {
- t1 = jit_get_reg(CLASS_SNG);
+ t1 = get_sng_reg(r0);
movr_d(rn(t1), r1);
FMOVS(rn(t1), rn(t1));
movr_d(r0, rn(t1));
assert(_jitc->function->self.call & jit_call_varargs);
/* Load argument. */
+#if __WORDSIZE == 64
ldr_d(r0, r1);
+#else
+ ldr_f(r0, r1);
+ ldxi_f(r0 + 1, r1, 4);
+#endif
/* Update vararg stack pointer. */
addi(r1, r1, 8);
#if __WORDSIZE == 32
-#define JIT_INSTR_MAX 52
+#define JIT_INSTR_MAX 180
0, /* data */
0, /* live */
0, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
0, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
- 8, /* va_arg_d */
+ 12, /* va_arg_d */
0, /* va_end */
4, /* addr */
12, /* addi */
8, /* movi */
16, /* movnr */
16, /* movzr */
+ 24, /* casr */
+ 32, /* casi */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
8, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
+ 20, /* bswapr_us */
+ 52, /* bswapr_ui */
+ 0, /* bswapr_ul */
8, /* htonr_us */
4, /* htonr_ui */
0, /* htonr_ul */
12, /* bxsubr_u */
12, /* bxsubi_u */
8, /* jmpr */
- 16, /* jmpi */
+ 8, /* jmpi */
8, /* callr */
- 16, /* calli */
+ 8, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 20, /* bswapr_us */
- 52, /* bswapr_ui */
- 0, /* bswapr_ul */
- 24, /* casr */
- 32, /* casi */
+ 176, /* clo */
+ 148, /* clz */
+ 180, /* cto */
+ 152, /* ctz */
#endif /* __WORDSIZE */
#if __WORDSIZE == 64
-#define JIT_INSTR_MAX 116
+#define JIT_INSTR_MAX 216
0, /* data */
0, /* live */
- 4, /* align */
+ 24, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
4, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
4, /* va_start */
8, /* va_arg */
8, /* va_arg_d */
24, /* movi */
16, /* movnr */
16, /* movzr */
+ 24, /* casr */
+ 44, /* casi */
8, /* extr_c */
4, /* extr_uc */
8, /* extr_s */
8, /* extr_us */
8, /* extr_i */
8, /* extr_ui */
+ 20, /* bswapr_us */
+ 52, /* bswapr_ui */
+ 116, /* bswapr_ul */
8, /* htonr_us */
8, /* htonr_ui */
4, /* htonr_ul */
4, /* ldr_c */
24, /* ldi_c */
4, /* ldr_uc */
- 24, /* ldi_uc */
+ 28, /* ldi_uc */
4, /* ldr_s */
- 24, /* ldi_s */
+ 28, /* ldi_s */
4, /* ldr_us */
- 24, /* ldi_us */
+ 28, /* ldi_us */
4, /* ldr_i */
- 24, /* ldi_i */
+ 28, /* ldi_i */
4, /* ldr_ui */
- 24, /* ldi_ui */
+ 28, /* ldi_ui */
4, /* ldr_l */
- 24, /* ldi_l */
+ 28, /* ldi_l */
4, /* ldxr_c */
24, /* ldxi_c */
4, /* ldxr_uc */
4, /* ldxr_l */
24, /* ldxi_l */
4, /* str_c */
- 24, /* sti_c */
+ 28, /* sti_c */
4, /* str_s */
- 24, /* sti_s */
+ 28, /* sti_s */
4, /* str_i */
- 24, /* sti_i */
+ 28, /* sti_i */
4, /* str_l */
- 24, /* sti_l */
+ 28, /* sti_l */
4, /* stxr_c */
24, /* stxi_c */
4, /* stxr_s */
12, /* bxsubr_u */
12, /* bxsubi_u */
8, /* jmpr */
- 32, /* jmpi */
+ 8, /* jmpi */
8, /* callr */
- 32, /* calli */
+ 40, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
16, /* truncr_f_l */
20, /* extr_f */
12, /* extr_d_f */
- 16, /* movr_f */
+ 24, /* movr_f */
32, /* movi_f */
8, /* ldr_f */
- 28, /* ldi_f */
+ 32, /* ldi_f */
8, /* ldxr_f */
28, /* ldxi_f */
8, /* str_f */
- 28, /* sti_f */
+ 32, /* sti_f */
8, /* stxr_f */
28, /* stxi_f */
20, /* bltr_f */
20, /* bler_f */
44, /* blei_f */
28, /* beqr_f */
- 60, /* beqi_f */
+ 52, /* beqi_f */
20, /* bger_f */
44, /* bgei_f */
20, /* bgtr_f */
44, /* bgti_f */
20, /* bner_f */
- 44, /* bnei_f */
+ 60, /* bnei_f */
20, /* bunltr_f */
44, /* bunlti_f */
20, /* bunler_f */
4, /* movr_d */
32, /* movi_d */
4, /* ldr_d */
- 24, /* ldi_d */
+ 28, /* ldi_d */
4, /* ldxr_d */
24, /* ldxi_d */
4, /* str_d */
- 24, /* sti_d */
+ 28, /* sti_d */
4, /* stxr_d */
24, /* stxi_d */
12, /* bltr_d */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 20, /* bswapr_us */
- 52, /* bswapr_ui */
- 116, /* bswapr_ul */
- 24, /* casr */
- 44, /* casi */
+ 216, /* clo */
+ 188, /* clz */
+ 204, /* cto */
+ 176, /* ctz */
#endif /* __WORDSIZE */
/*
- * Copyright (C) 2013-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* Paulo Cesar Pereira de Andrade
*/
+/* Handling SIGILL should not be done by Lightning, but can either use
+ * sample, or use another approach to set jit_cpu.lzcnt
+ */
+#define CHECK_LZCNT 0
+
+#if CHECK_LZCNT
+#include <signal.h>
+#include <setjmp.h>
+#endif
+
#define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6)
#if __WORDSIZE == 32
# define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 5)
#define PROTO 1
# include "jit_sparc-cpu.c"
# include "jit_sparc-fpu.c"
+# include "jit_fallback.c"
#undef PROTO
/*
* Initialization
*/
+jit_cpu_t jit_cpu;
jit_register_t _rvs[] = {
{ 0x00, "%g0" },
{ 0x01, "%g1" },
# endif
{ _NOREG, "<none>" },
};
+#if CHECK_LZCNT
+sigjmp_buf jit_env;
+#endif
/*
* Implementation
*/
+#if CHECK_LZCNT
+static void
+sigill_handler(int signum)
+{
+ jit_cpu.lzcnt = 0;
+ siglongjmp(jit_env, 1);
+}
+#endif
+
void
jit_get_cpu(void)
{
+#if CHECK_LZCNT
+ int g2;
+ struct sigaction new_action, old_action;
+ new_action.sa_handler = sigill_handler;
+ sigemptyset(&new_action.sa_mask);
+ new_action.sa_flags = 0;
+ sigaction(SIGILL, NULL, &old_action);
+ if (old_action.sa_handler != SIG_IGN) {
+ sigaction(SIGILL, &new_action, NULL);
+ if (!sigsetjmp(jit_env, 1)) {
+ jit_cpu.lzcnt = 1;
+ /* lzcnt %g2, %g2 */
+ __asm__ volatile("mov %%g2, %0; .long 0xa3b0021; mov %0, %%g2"
+ : "=r" (g2));
+ sigaction(SIGILL, &old_action, NULL);
+ }
+ }
+#else
+ jit_cpu.lzcnt = 0;
+#endif
}
void
_jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
_jitc->function->self.size = stack_framesize;
_jitc->function->self.argi = _jitc->function->self.argf =
- _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+ _jitc->function->self.alen = 0;
/* float conversion */
# if __WORDSIZE == 32
_jitc->function->self.aoff = -8;
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(retr, u);
- if (JIT_RET != u)
- jit_movr(JIT_RET, u);
- jit_live(JIT_RET);
+ jit_code_inc_synth_w(code, u);
+ jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
# if __WORDSIZE == 32
- if (u->code == jit_code_arg || u->code == jit_code_arg_f)
+ if ((u->code >= jit_code_arg_c && u->code <= jit_code_arg) ||
+ u->code == jit_code_arg_f)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_d);
return (jit_arg_d_reg_p(u->u.w));
# else
- if (u->code == jit_code_arg)
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f);
return (jit_arg_d_reg_p(u->u.w));
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
assert(_jitc->function);
+ assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
if (jit_arg_reg_p(_jitc->function->self.argi))
offset = _jitc->function->self.argi++;
else {
offset = BIAS(_jitc->function->self.size);
_jitc->function->self.size += sizeof(jit_word_t);
}
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, _I0 + v->u.w);
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, _I0 + v->u.w);
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, _I0 + v->u.w);
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, _I0 + v->u.w);
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w)) {
# if __WORDSIZE == 64
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_ui(u, _I0 + v->u.w);
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_i, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, _I0 + v->u.w);
# endif
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(_I0 + v->u.w, u);
else
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movi(_I0 + v->u.w, u);
else {
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movr(_O0 + _jitc->function->call.argi, u);
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_movi(_O0 + _jitc->function->call.argi, u);
struct {
jit_node_t *node;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
+ case jit_code_skip:
+ nop((node->u.w + 3) & ~3);
+ break;
case jit_code_note: case jit_code_name:
node->u.w = _jit->pc.w;
break;
break;
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_brr(blt,);
case_brw(blt,);
case_brr(blt, _u);
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
- word = jmpi_p(_jit->pc.w);
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (s22_p(word >> 2))
+ word = jmpi(_jit->pc.w);
+ else
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
temp = node->u.n;
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
- word = calli_p(temp->u.w);
- if (!(temp->flag & jit_flag_patch))
+ if (temp->flag & jit_flag_patch)
+ calli(temp->u.w);
+ else {
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if (s30_p(word >> 2))
+ word = calli(_jit->pc.w);
+ else
+ word = calli_p(_jit->pc.w);
patch(word, node);
+ }
}
else
calli(node->u.w);
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
- case jit_code_arg:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i:
+#if __WORDSIZE == 64
+ case jit_code_arg_l:
+#endif
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+#if __WORDSIZE == 64
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
+#endif
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_ui: case jit_code_getarg_l:
#endif
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+#if __WORDSIZE == 64
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
+#endif
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
+#endif
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
#define CODE 1
# include "jit_sparc-cpu.c"
# include "jit_sparc-fpu.c"
+# include "jit_fallback.c"
#undef CODE
void
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
# else
# define il(l) ii(l)
# endif
-# define patch_abs(instr, label) \
- *(jit_word_t *)(instr - sizeof(jit_word_t)) = label
-# define patch_rel(instr, label) \
- *(jit_int32_t *)(instr - 4) = label - instr
-# define patch_rel_char(instr, label) \
- *(jit_int8_t *)(instr - 1) = label - instr
# define rex(l, w, r, x, b) _rex(_jit, l, w, r, x, b)
static void
_rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
#define addci(r0, r1, i0) _addci(_jit, r0, r1, i0)
static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-# define iaddxr(r0, r1) alur(X86_ADC, r0, r1)
+# define iaddxr(r0, r1) _iaddxr(_jit, r0, r1)
+static void _iaddxr(jit_state_t*, jit_int32_t, jit_int32_t);
# define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2)
static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
# define iaddxi(r0, i0) alui(X86_ADC, r0, i0)
# define decr(r0, r1) _decr(_jit, r0, r1)
static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
# endif
+# define clor(r0, r1) _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define clzr(r0, r1) _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctor(r0, r1) _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+# define ctzr(r0, r1) _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
# define cr(code, r0, r1, r2) _cr(_jit, code, r0, r1, r2)
static void
_cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
# define imovi(r0, i0) _imovi(_jit, r0, i0)
static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
# define movi(r0, i0) _movi(_jit, r0, i0)
-static void _movi(jit_state_t*, jit_int32_t, jit_word_t);
+static
+# if CAN_RIP_ADDRESS
+jit_word_t
+# else
+void
+# endif
+_movi(jit_state_t*, jit_int32_t, jit_word_t);
# define movi_p(r0, i0) _movi_p(_jit, r0, i0)
static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
# define movcr(r0, r1) _movcr(_jit, r0, r1)
# define jng(i0) jcc(X86_CC_NG, i0)
# define jg(i0) jcc(X86_CC_G, i0)
# define jnle(i0) jcc(X86_CC_NLE, i0)
-static void _jcc(jit_state_t*, jit_int32_t, jit_word_t);
+static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t);
# define jccs(code, i0) _jccs(_jit, code, i0)
# define jos(i0) jccs(X86_CC_O, i0)
# define jnos(i0) jccs(X86_CC_NO, i0)
# define jngs(i0) jccs(X86_CC_NG, i0)
# define jgs(i0) jccs(X86_CC_G, i0)
# define jnles(i0) jccs(X86_CC_NLE, i0)
-static void _jccs(jit_state_t*, jit_int32_t, jit_word_t);
+static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t);
# define jcr(code, i0, r0, r1) _jcr(_jit, code, i0, r0, r1)
-static void _jcr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _jcr(jit_state_t*,
+ jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
# define jci(code, i0, r0, i1) _jci(_jit, code, i0, r0, i1)
-static void _jci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+static jit_word_t _jci(jit_state_t*,
+ jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
# define jci0(code, i0, r0) _jci0(_jit, code, i0, r0)
-static void _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
+static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
# define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1)
static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
# define blti(i0, r0, i1) _blti(_jit, i0, r0, i1)
# define jmpi_p(i0) jmpi(i0)
# endif
# define jmpsi(i0) _jmpsi(_jit, i0)
-static void _jmpsi(jit_state_t*, jit_uint8_t);
+static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t);
# define prolog(node) _prolog(_jit, node)
static void _prolog(jit_state_t*, jit_node_t*);
# define epilog(node) _epilog(_jit, node)
static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
# define vaarg_d(r0, r1, i0) _vaarg_d(_jit, r0, r1, i0)
static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
-# define patch_at(node, instr, label) _patch_at(_jit, node, instr, label)
-static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t);
+# define patch_at(instr, label) _patch_at(_jit, instr, label)
+static void _patch_at(jit_state_t*, jit_word_t, jit_word_t);
# if !defined(HAVE_FFSL)
# if __X32
# define ffsl(i) __builtin_ffs(i)
{
if (ri == _NOREG) {
if (rb == _NOREG) {
-#if __X32
- mrm(0x00, r7(rd), 0x05);
-#else
- mrm(0x00, r7(rd), 0x04);
- sib(_SCL1, 0x04, 0x05);
+ /* Use ms == _SCL8 to tell it is a %rip relative displacement */
+#if __X64
+ if (ms == _SCL8)
+#endif
+ mrm(0x00, r7(rd), 0x05);
+#if __X64
+ else {
+ mrm(0x00, r7(rd), 0x04);
+ sib(_SCL1, 0x04, 0x05);
+ }
#endif
ii(md);
}
}
}
+static void
+_iaddxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ /* FIXME: this is not doing what I did expect for the simple test case:
+ * mov $0xffffffffffffffff, %rax -- rax = 0xffffffffffffffff (-1)
+ * mov $0xffffffffffffffff, %r10 -- r10 = 0xffffffffffffffff (-1)
+ * mov $0x1, %r11d -- r11 = 1
+ * xor %rbx, %rbx -- rbx = 0
+ * (gdb) p $eflags
+ * $1 = [ PF ZF IF ]
+ * add %r11, %rax -- r11 = 0x10000000000000000 (0)
+ * does not fit in 64 bit ^
+ * (gdb) p $eflags
+ * $2 = [ CF PF AF ZF IF ]
+ * adcx %r10, %rbx -- r10 = 0xffffffffffffffff (-1)
+ * (gdb) p $eflags
+ * $3 = [ CF PF AF ZF IF ]
+ * (gdb) p/x $r10
+ * $4 = 0xffffffffffffffff
+ * but, r10 should be zero, as it is:
+ * -1 (%r10) + 0 (%rbx) + carry (!!eflags.CF)
+ * FIXME: maybe should only use ADCX in the third operation onward, that
+ * is, after the first ADC? In either case, the add -1+0+carry should
+ * have used and consumed the carry? At least this is what is expected
+ * in Lightning...
+ */
+#if 0
+ /* Significantly longer instruction, but avoid cpu stalls as only
+ * the carry flag is used in a sequence. */
+ if (jit_cpu.adx) {
+ /* ADCX */
+ ic(0x66);
+ rex(0, WIDE, r1, _NOREG, r0);
+ ic(0x0f);
+ ic(0x38);
+ ic(0xf6);
+ mrm(0x03, r7(r1), r7(r0));
+ }
+ else
+#endif
+ alur(X86_ADC, r0, r1);
+}
+
static void
_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
{
_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+ if (
+#if 0
+ /* Do not mix ADC and ADCX */
+ !jit_cpu.adx &&
+#endif
+ can_sign_extend_int_p(i0)) {
movr(r0, r1);
iaddxi(r0, i0);
}
}
#endif
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ clzr(r0, r0);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_word_t w, x;
+ /* LZCNT */
+ if (jit_cpu.abm)
+ ic(0xf3);
+ /* else BSR */
+ rex(0, WIDE, r0, _NOREG, r1);
+ ic(0x0f);
+ ic(0xbd);
+ mrm(0x3, r7(r0), r7(r1));
+ if (!jit_cpu.abm) {
+ /* jump if undefined: r1 == 0 */
+ w = jccs(X86_CC_E, _jit->pc.w);
+ /* count leading zeros */
+ rsbi(r0, r0, __WORDSIZE - 1);
+ /* done */
+ x = jmpsi(_jit->pc.w);
+ /* if r1 == 0 */
+ patch_at(w, _jit->pc.w);
+ movi(r0, __WORDSIZE);
+ /* not undefined */
+ patch_at(x, _jit->pc.w);
+ }
+ /* LZCNT has defined behavior for value zero and count leading zeros */
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ comr(r0, r1);
+ ctzr(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+ jit_word_t w;
+ jit_int32_t t0;
+ if (!jit_cpu.abm) {
+ if (jit_cmov_p())
+ t0 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk);
+ else
+ t0 = _NOREG;
+ if (t0 != _NOREG)
+ movi(rn(t0), __WORDSIZE);
+ }
+ /* TZCNT */
+ if (jit_cpu.abm)
+ ic(0xf3);
+ /* else BSF */
+ rex(0, WIDE, r0, _NOREG, r1);
+ ic(0x0f);
+ ic(0xbc);
+ mrm(0x3, r7(r0), r7(r1));
+ if (!jit_cpu.abm) {
+ /* No conditional move or need spill/reload a temporary */
+ if (t0 == _NOREG) {
+ w = jccs(X86_CC_E, _jit->pc.w);
+ movi(r0, __WORDSIZE);
+ patch_at(w, _jit->pc.w);
+ }
+ else {
+ /* CMOVE */
+ rex(0, WIDE, r0, _NOREG, rn(t0));
+ ic(0x0f);
+ ic(0x44);
+ mrm(0x3, r7(r0), r7(rn(t0)));
+ jit_unget_reg(t0);
+ }
+ }
+ /* TZCNT has defined behavior for value zero */
+}
+
static void
_cr(jit_state_t *_jit,
jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
ii(i0);
# if !__X64_32
}
+ else if (can_sign_extend_int_p(i0)) {
+ rex(0, 1, _NOREG, _NOREG, r0);
+ ic(0xc7);
+ ic(0xc0 | r7(r0));
+ ii(i0);
+ }
else {
rex(0, 1, _NOREG, _NOREG, r0);
ic(0xb8 | r7(r0));
#endif
}
+#if CAN_RIP_ADDRESS
+static jit_word_t
+#else
static void
+#endif
_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
+#if CAN_RIP_ADDRESS
+ jit_word_t w, rel;
+ w = _jit->pc.w;
+ rel = i0 - (w + 8);
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ /* lea rel(%rip), %r0 */
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ w = _jit->pc.w;
+ ic(0x8d);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
if (i0)
imovi(r0, i0);
else
ixorr(r0, r0);
+#if CAN_RIP_ADDRESS
+ return (w);
+#endif
}
static jit_word_t
_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
+ jit_word_t w;
rex(0, WIDE, _NOREG, _NOREG, r0);
+ w = _jit->pc.w;
ic(0xb8 | r7(r0));
il(i0);
- return (_jit->pc.w);
+ return (w);
}
static void
_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x0f);
+ ic(0xbe);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x0f);
ic(0xbe);
_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x0f);
+ ic(0xb6);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x0f);
ic(0xb6);
_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x0f);
+ ic(0xbf);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x0f);
ic(0xbf);
_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x0f);
+ ic(0xb7);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x0f);
ic(0xb7);
_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x63);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
#if __X64
rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x63);
_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+# if !__X64_32
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, 0, r0, _NOREG, _NOREG);
+ ic(0x63);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, 0, r0, _NOREG, _NOREG);
ic(0x63);
rx(r0, i0, _NOREG, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
+# if __X64_32
+ ldr_i(r0, rn(reg));
+# else
ldr_ui(r0, rn(reg));
+# endif
jit_unget_reg(reg);
}
}
_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
- rex(0, 1, r0, _NOREG, _NOREG);
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x8b);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else if (can_sign_extend_int_p(i0)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x8b);
rx(r0, i0, _NOREG, _NOREG, _SCL1);
}
else {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), i0);
+# if __X64_32
+ ldxr_i(r0, r1, rn(reg));
+# else
ldxr_ui(r0, r1, rn(reg));
+# endif
jit_unget_reg(reg);
}
}
_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 16 : rel + 16;
+ if (can_sign_extend_int_p(rel)) {
+ if (reg8_p(r0)) {
+ rex(0, 0, r0, _NOREG, _NOREG);
+ ic(0x88);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else {
+ reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+ movr(rn(reg), r0);
+ rex(0, 0, rn(reg), _NOREG, _NOREG);
+ ic(0x88);
+ rx(rn(reg), i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ jit_unget_reg(reg);
+ }
+ }
+ else
+#endif
+ if (address_p(i0)) {
if (reg8_p(r0)) {
rex(0, 0, r0, _NOREG, _NOREG);
ic(0x88);
_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ ic(0x66);
+ rex(0, 0, r0, _NOREG, _NOREG);
+ ic(0x89);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
ic(0x66);
rex(0, 0, r0, _NOREG, _NOREG);
ic(0x89);
_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, 0, r0, _NOREG, _NOREG);
+ ic(0x89);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
+ if (address_p(i0)) {
rex(0, 0, r0, _NOREG, _NOREG);
ic(0x89);
rx(r0, i0, _NOREG, _NOREG, _SCL1);
_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - _jit->pc.w;
+ rel = rel < 0 ? rel - 8 : rel + 8;
+ if (can_sign_extend_int_p(rel)) {
+ rex(0, WIDE, r0, _NOREG, _NOREG);
+ ic(0x89);
+ rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+ }
+ else
+#endif
if (can_sign_extend_int_p(i0)) {
- rex(0, 1, r0, _NOREG, _NOREG);
+ rex(0, WIDE, r0, _NOREG, _NOREG);
ic(0x89);
rx(r0, i0, _NOREG, _NOREG, _SCL1);
}
}
#endif
-static void
+static jit_word_t
_jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
{
+ jit_word_t d;
jit_word_t w;
+ w = _jit->pc.w;
+ d = i0 - (w + 1);
ic(0x70 | code);
- w = i0 - (_jit->pc.w + 1);
- ic(w);
+ ic(d);
+ return (w);
}
-static void
+static jit_word_t
_jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
{
+ jit_word_t d;
jit_word_t w;
+ w = _jit->pc.w;
ic(0x0f);
+ d = i0 - (w + 6);
ic(0x80 | code);
- w = i0 - (_jit->pc.w + 4);
- ii(w);
+ ii(d);
+ return (w);
}
-static void
+static jit_word_t
_jcr(jit_state_t *_jit,
jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
alur(X86_CMP, r0, r1);
- jcc(code, i0);
+ return (jcc(code, i0));
}
-static void
+static jit_word_t
_jci(jit_state_t *_jit,
jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
alui(X86_CMP, r0, i1);
- jcc(code, i0);
+ return (jcc(code, i0));
}
-static void
+static jit_word_t
_jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
{
testr(r0, r0);
- jcc(code, i0);
+ return (jcc(code, i0));
}
static jit_word_t
_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_L, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_L, i0, r0, r1));
}
static jit_word_t
_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_L, i0, r0, i1);
- else jci0(X86_CC_S, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_L, i0, r0, i1);
+ else w = jci0(X86_CC_S, i0, r0);
+ return (w);
}
static jit_word_t
_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_B, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_B, i0, r0, r1));
}
static jit_word_t
_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_B, i0, r0, i1);
- else jci0(X86_CC_B, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_B, i0, r0, i1);
+ else w = jci0(X86_CC_B, i0, r0);
+ return (w);
}
static jit_word_t
_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_LE, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_LE, i0, r0, r1);
+ return (w);
}
static jit_word_t
_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_LE, i0, r0, i1);
- else jci0(X86_CC_LE, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_LE, i0, r0, i1);
+ else w = jci0(X86_CC_LE, i0, r0);
+ return (w);
}
static jit_word_t
_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_BE, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_BE, i0, r0, r1);
+ return (w);
}
static jit_word_t
_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_BE, i0, r0, i1);
- else jci0(X86_CC_BE, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_BE, i0, r0, i1);
+ else w = jci0(X86_CC_BE, i0, r0);
+ return (w);
}
static jit_word_t
_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_E, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_E, i0, r0, r1);
+ return (w);
}
static jit_word_t
_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_E, i0, r0, i1);
- else jci0(X86_CC_E, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_E, i0, r0, i1);
+ else w = jci0(X86_CC_E, i0, r0);
+ return (w);
}
static jit_word_t
_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_GE, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_GE, i0, r0, r1);
+ return (w);
}
static jit_word_t
_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_GE, i0, r0, i1);
- else jci0(X86_CC_NS, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_GE, i0, r0, i1);
+ else w = jci0(X86_CC_NS, i0, r0);
+ return (w);
}
static jit_word_t
_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- if (r0 == r1) jmpi(i0);
- else jcr (X86_CC_AE, i0, r0, r1);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (r0 == r1) w = jmpi(i0);
+ else w = jcr (X86_CC_AE, i0, r0, r1);
+ return (w);
}
static jit_word_t
_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_AE, i0, r0, i1);
- else jmpi(i0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_AE, i0, r0, i1);
+ else w = jmpi(i0);
+ return (w);
}
static jit_word_t
_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_G, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_G, i0, r0, r1));
}
static jit_word_t
_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- jci(X86_CC_G, i0, r0, i1);
- return (_jit->pc.w);
+ return (jci(X86_CC_G, i0, r0, i1));
}
static jit_word_t
_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_A, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_A, i0, r0, r1));
}
static jit_word_t
_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_A, i0, r0, i1);
- else jci0(X86_CC_NE, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_A, i0, r0, i1);
+ else w = jci0(X86_CC_NE, i0, r0);
+ return (w);
}
static jit_word_t
_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
- jcr(X86_CC_NE, i0, r0, r1);
- return (_jit->pc.w);
+ return (jcr(X86_CC_NE, i0, r0, r1));
}
static jit_word_t
_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
{
- if (i1) jci (X86_CC_NE, i0, r0, i1);
- else jci0(X86_CC_NE, i0, r0);
- return (_jit->pc.w);
+ jit_word_t w;
+ if (i1) w = jci (X86_CC_NE, i0, r0, i1);
+ else w = jci0(X86_CC_NE, i0, r0);
+ return (w);
}
static jit_word_t
_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
testr(r0, r1);
- jnz(i0);
- return (_jit->pc.w);
+ return (jnz(i0));
}
static jit_word_t
testr(r0, rn(reg));
jit_unget_reg(reg);
}
- jnz(i0);
- return (_jit->pc.w);
+ return (jnz(i0));
}
static jit_word_t
_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
testr(r0, r1);
- jz(i0);
- return (_jit->pc.w);
+ return (jz(i0));
}
static jit_word_t
testr(r0, rn(reg));
jit_unget_reg(reg);
}
- jz(i0);
- return (_jit->pc.w);
+ return (jz(i0));
}
static jit_word_t
_boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
iaddr(r0, r1);
- jo(i0);
- return (_jit->pc.w);
+ return (jo(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
iaddi(r0, i1);
- jo(i0);
- return (_jit->pc.w);
+ return (jo(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
iaddr(r0, r1);
- jc(i0);
- return (_jit->pc.w);
+ return (jc(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
iaddi(r0, i1);
- jc(i0);
- return (_jit->pc.w);
+ return (jc(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
iaddr(r0, r1);
- jno(i0);
- return (_jit->pc.w);
+ return (jno(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
iaddi(r0, i1);
- jno(i0);
- return (_jit->pc.w);
+ return (jno(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
iaddr(r0, r1);
- jnc(i0);
- return (_jit->pc.w);
+ return (jnc(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
iaddi(r0, i1);
- jnc(i0);
- return (_jit->pc.w);
+ return (jnc(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
isubr(r0, r1);
- jo(i0);
- return (_jit->pc.w);
+ return (jo(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
isubi(r0, i1);
- jo(i0);
- return (_jit->pc.w);
+ return (jo(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
isubr(r0, r1);
- jc(i0);
- return (_jit->pc.w);
+ return (jc(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
isubi(r0, i1);
- jc(i0);
- return (_jit->pc.w);
+ return (jc(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
isubr(r0, r1);
- jno(i0);
- return (_jit->pc.w);
+ return (jno(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
isubi(r0, i1);
- jno(i0);
- return (_jit->pc.w);
+ return (jno(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
_bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
isubr(r0, r1);
- jnc(i0);
- return (_jit->pc.w);
+ return (jnc(i0));
}
static jit_word_t
jit_int32_t reg;
if (can_sign_extend_int_p(i1)) {
isubi(r0, i1);
- jnc(i0);
- return (_jit->pc.w);
+ return (jnc(i0));
}
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
movi(rn(reg), i1);
static jit_word_t
_calli(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t word;
jit_word_t w;
+ jit_word_t d;
+ jit_word_t l = _jit->pc.w + 5;
+ d = i0 - l;
#if __X64
- w = i0 - (_jit->pc.w + 5);
- if ((jit_int32_t)w == w) {
+ if (
+# if __X64_32
+ !((d < 0) ^ (l < 0)) &&
+# endif
+ (jit_int32_t)d == d) {
#endif
+ w = _jit->pc.w;
ic(0xe8);
- w = i0 - (_jit->pc.w + 4);
- ii(w);
- word = _jit->pc.w;
+ ii(d);
#if __X64
}
else
- word = calli_p(i0);
+ w = calli_p(i0);
#endif
- return (word);
+ return (w);
}
#if __X64
static jit_word_t
_calli_p(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t word;
+ jit_word_t w;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr);
- word = movi_p(rn(reg), i0);
+ w = movi_p(rn(reg), i0);
callr(rn(reg));
jit_unget_reg(reg);
- return (word);
+ return (w);
}
#endif
static jit_word_t
_jmpi(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t word;
jit_word_t w;
+ jit_word_t d;
+ jit_word_t l = _jit->pc.w + 5;
+ d = i0 - l;
#if __X64
- w = i0 - (_jit->pc.w + 5);
- if ((jit_int32_t)w == w) {
+ if (
+# if __X64_32
+ !((d < 0) ^ (l < 0)) &&
+# endif
+ (jit_int32_t)d == d) {
#endif
+ w = _jit->pc.w;
ic(0xe9);
- w = i0 - (_jit->pc.w + 4);
- ii(w);
- word = _jit->pc.w;
+ ii(d);
#if __X64
}
else
- word = jmpi_p(i0);
+ w = jmpi_p(i0);
#endif
- return (word);
+ return (w);
}
#if __X64
static jit_word_t
_jmpi_p(jit_state_t *_jit, jit_word_t i0)
{
- jit_word_t word;
+ jit_word_t w;
jit_int32_t reg;
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
- word = movi_p(rn(reg), i0);
+ w = movi_p(rn(reg), i0);
jmpr(rn(reg));
jit_unget_reg(reg);
- return (word);
+ return (w);
}
#endif
-static void
+static jit_word_t
_jmpsi(jit_state_t *_jit, jit_uint8_t i0)
{
+ jit_word_t w = _jit->pc.w;
ic(0xeb);
ic(i0);
+ return (w);
}
static void
_prolog(jit_state_t *_jit, jit_node_t *node)
{
- jit_int32_t reg;
+ jit_int32_t reg, offs;
if (_jitc->function->define_frame || _jitc->function->assume_frame) {
jit_int32_t frame = -_jitc->function->frame;
+ jit_check_frame();
assert(_jitc->function->self.aoff >= frame);
if (_jitc->function->assume_frame)
return;
(_jitc->function->self.alen > 32 ?
_jitc->function->self.alen : 32) -
/* align stack at 16 bytes */
- _jitc->function->self.aoff) + 15) & -16) +
- stack_adjust;
+ _jitc->function->self.aoff) + 15) & -16);
#else
_jitc->function->stack = (((_jitc->function->self.alen -
- _jitc->function->self.aoff) + 15) & -16) +
- stack_adjust;
+ _jitc->function->self.aoff) + 15) & -16);
#endif
- subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+
+ if (_jitc->function->stack)
+ _jitc->function->need_stack = 1;
+
+ if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+ /* check if any callee save register needs to be saved */
+ for (reg = 0; reg < _jitc->reglen; ++reg)
+ if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+ (_rvs[reg].spec & jit_class_sav)) {
+ _jitc->function->need_stack = 1;
+ break;
+ }
+ }
+
+ if (_jitc->function->need_frame || _jitc->function->need_stack)
+ subi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
/* callee save registers */
-#if __X32
- if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
- stxi(12, _RSP_REGNO, _RDI_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
- stxi( 8, _RSP_REGNO, _RSI_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- stxi( 4, _RSP_REGNO, _RBX_REGNO);
-#else
-# if __CYGWIN__ || _WIN32
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
- sse_stxi_d(136, _RSP_REGNO, _XMM15_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
- sse_stxi_d(128, _RSP_REGNO, _XMM14_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
- sse_stxi_d(120, _RSP_REGNO, _XMM13_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
- sse_stxi_d(112, _RSP_REGNO, _XMM12_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
- sse_stxi_d(104, _RSP_REGNO, _XMM11_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
- sse_stxi_d(96, _RSP_REGNO, _XMM10_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
- sse_stxi_d(88, _RSP_REGNO, _XMM9_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
- sse_stxi_d(80, _RSP_REGNO, _XMM8_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
- sse_stxi_d(72, _RSP_REGNO, _XMM7_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
- sse_stxi_d(64, _RSP_REGNO, _XMM6_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R15))
- stxi(56, _RSP_REGNO, _R15_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R14))
- stxi(48, _RSP_REGNO, _R14_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R13))
- stxi(40, _RSP_REGNO, _R13_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R12))
- stxi(32, _RSP_REGNO, _R12_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
- stxi(24, _RSP_REGNO, _RSI_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
- stxi(16, _RSP_REGNO, _RDI_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- stxi( 8, _RSP_REGNO, _RBX_REGNO);
-# else
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- stxi(40, _RSP_REGNO, _RBX_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R12))
- stxi(32, _RSP_REGNO, _R12_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R13))
- stxi(24, _RSP_REGNO, _R13_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R14))
- stxi(16, _RSP_REGNO, _R14_REGNO);
- if (jit_regset_tstbit(&_jitc->function->regset, _R15))
- stxi( 8, _RSP_REGNO, _R15_REGNO);
-# endif
+ for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ stxi(offs, _RSP_REGNO, rn(iregs[reg]));
+ offs += REAL_WORDSIZE;
+ }
+ }
+#if __X64 && (__CYGWIN__ || _WIN32)
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ sse_stxi_d(offs, _RSP_REGNO, rn(fregs[reg]));
+ offs += sizeof(jit_float64_t);
+ }
+ }
#endif
- stxi(0, _RSP_REGNO, _RBP_REGNO);
- movr(_RBP_REGNO, _RSP_REGNO);
+
+ if (_jitc->function->need_frame) {
+ stxi(0, _RSP_REGNO, _RBP_REGNO);
+ movr(_RBP_REGNO, _RSP_REGNO);
+ }
/* alloca */
- subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
+ if (_jitc->function->stack)
+ subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
if (_jitc->function->allocar) {
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), _jitc->function->self.aoff);
/* test %al, %al */
ic(0x84);
ic(0xc0);
- jes(0);
- nofp_code = _jit->pc.w;
+ nofp_code = jes(0);
/* Save fp registers in the save area, if any is a vararg */
/* Note that the full 16 byte xmm is not saved, because
sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
- patch_rel_char(nofp_code, _jit->pc.w);
+ patch_at(nofp_code, _jit->pc.w);
}
}
#endif
static void
_epilog(jit_state_t *_jit, jit_node_t *node)
{
+ jit_int32_t reg, offs;
if (_jitc->function->assume_frame)
return;
+ if (_jitc->function->need_frame)
+ movr(_RSP_REGNO, _RBP_REGNO);
+
/* callee save registers */
- movr(_RSP_REGNO, _RBP_REGNO);
-#if __X32
- if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
- ldxi(_RDI_REGNO, _RSP_REGNO, 12);
- if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
- ldxi(_RSI_REGNO, _RSP_REGNO, 8);
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- ldxi(_RBX_REGNO, _RSP_REGNO, 4);
-#else
-# if __CYGWIN__ || _WIN32
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
- sse_ldxi_d(_XMM15_REGNO, _RSP_REGNO, 136);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
- sse_ldxi_d(_XMM14_REGNO, _RSP_REGNO, 128);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
- sse_ldxi_d(_XMM13_REGNO, _RSP_REGNO, 120);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
- sse_ldxi_d(_XMM12_REGNO, _RSP_REGNO, 112);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
- sse_ldxi_d(_XMM11_REGNO, _RSP_REGNO, 104);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
- sse_ldxi_d(_XMM10_REGNO, _RSP_REGNO, 96);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
- sse_ldxi_d(_XMM9_REGNO, _RSP_REGNO, 88);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
- sse_ldxi_d(_XMM8_REGNO, _RSP_REGNO, 80);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
- sse_ldxi_d(_XMM7_REGNO, _RSP_REGNO, 72);
- if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
- sse_ldxi_d(_XMM6_REGNO, _RSP_REGNO, 64);
- if (jit_regset_tstbit(&_jitc->function->regset, _R15))
- ldxi(_R15_REGNO, _RSP_REGNO, 56);
- if (jit_regset_tstbit(&_jitc->function->regset, _R14))
- ldxi(_R14_REGNO, _RSP_REGNO, 48);
- if (jit_regset_tstbit(&_jitc->function->regset, _R13))
- ldxi(_R13_REGNO, _RSP_REGNO, 40);
- if (jit_regset_tstbit(&_jitc->function->regset, _R12))
- ldxi(_R12_REGNO, _RSP_REGNO, 32);
- if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
- ldxi(_RSI_REGNO, _RSP_REGNO, 24);
- if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
- ldxi(_RDI_REGNO, _RSP_REGNO, 16);
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- ldxi(_RBX_REGNO, _RSP_REGNO, 8);
-# else
- if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
- ldxi(_RBX_REGNO, _RSP_REGNO, 40);
- if (jit_regset_tstbit(&_jitc->function->regset, _R12))
- ldxi(_R12_REGNO, _RSP_REGNO, 32);
- if (jit_regset_tstbit(&_jitc->function->regset, _R13))
- ldxi(_R13_REGNO, _RSP_REGNO, 24);
- if (jit_regset_tstbit(&_jitc->function->regset, _R14))
- ldxi(_R14_REGNO, _RSP_REGNO, 16);
- if (jit_regset_tstbit(&_jitc->function->regset, _R15))
- ldxi(_R15_REGNO, _RSP_REGNO, 8);
-# endif
+ for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+ ldxi(rn(iregs[reg]), _RSP_REGNO, offs);
+ offs += REAL_WORDSIZE;
+ }
+ }
+#if __X64 && (__CYGWIN__ || _WIN32)
+ for (reg = 0; reg < jit_size(fregs); reg++) {
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+ sse_ldxi_d(rn(fregs[reg]), _RSP_REGNO, offs);
+ offs += sizeof(jit_float64_t);
+ }
+ }
#endif
- ldxi(_RBP_REGNO, _RSP_REGNO, 0);
- addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+
+ if (_jitc->function->need_frame) {
+ ldxi(_RBP_REGNO, _RSP_REGNO, 0);
+ addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
+ }
+ /* This condition does not happen as much as expected because
+ * it is not safe to not create a frame pointer if any function
+ * is called, even jit functions, as those might call external
+ * functions. */
+ else if (_jitc->function->need_stack)
+ addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
ic(0xc3);
}
{
#if __X32 || __CYGWIN__ || _WIN32
assert(_jitc->function->self.call & jit_call_varargs);
- addi(r0, _RBP_REGNO, _jitc->function->self.size);
+ addi(r0, _RBP_REGNO, jit_selfsize());
#else
jit_int32_t reg;
stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
/* Initialize overflow pointer to the first stack argument. */
- addi(rn(reg), _RBP_REGNO, _jitc->function->self.size);
+ addi(rn(reg), _RBP_REGNO, jit_selfsize());
stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
/* Initialize register save area pointer. */
/* Jump over if there are no remaining arguments in the save area. */
icmpi(rn(rg0), va_gp_max_offset);
- jaes(0);
- ge_code = _jit->pc.w;
+ ge_code = jaes(0);
/* Load the save area pointer in the second temporary. */
ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
jit_unget_reg(rg1);
/* Jump over overflow code. */
- jmpsi(0);
- lt_code = _jit->pc.w;
+ lt_code = jmpsi(0);
/* Where to land if argument is in overflow area. */
- patch_rel_char(ge_code, _jit->pc.w);
+ patch_at(ge_code, _jit->pc.w);
/* Load overflow pointer. */
ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
/* Where to land if argument is in save area. */
- patch_rel_char(lt_code, _jit->pc.w);
+ patch_at(lt_code, _jit->pc.w);
jit_unget_reg(rg0);
#endif
/* Jump over if there are no remaining arguments in the save area. */
icmpi(rn(rg0), va_fp_max_offset);
- jaes(0);
- ge_code = _jit->pc.w;
+ ge_code = jaes(0);
/* Load the save area pointer in the second temporary. */
ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
jit_unget_reg(rg1);
/* Jump over overflow code. */
- jmpsi(0);
- lt_code = _jit->pc.w;
+ lt_code = jmpsi(0);
/* Where to land if argument is in overflow area. */
- patch_rel_char(ge_code, _jit->pc.w);
+ patch_at(ge_code, _jit->pc.w);
/* Load overflow pointer. */
ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
/* Where to land if argument is in save area. */
- patch_rel_char(lt_code, _jit->pc.w);
+ patch_at(lt_code, _jit->pc.w);
jit_unget_reg(rg0);
#endif
}
static void
-_patch_at(jit_state_t *_jit, jit_node_t *node,
- jit_word_t instr, jit_word_t label)
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
{
- switch (node->code) {
-# if __X64
- case jit_code_calli:
- case jit_code_jmpi:
-# endif
- case jit_code_movi:
- patch_abs(instr, label);
+ jit_word_t disp;
+ jit_uint8_t *code = (jit_uint8_t *)instr;
+ ++instr;
+ switch (code[0]) {
+ /* movi_p */
+ case 0xb8 ... 0xbf:
+ *(jit_word_t *)instr = label;
break;
- default:
- patch_rel(instr, label);
+ /* forward pc relative address known to be in range */
+#if CAN_RIP_ADDRESS
+ /* movi */
+ case 0x8d:
+ ++instr;
+ goto apply;
+#endif
+ /* jcc */
+ case 0x0f:
+ ++instr;
+ if (code[1] < 0x80 || code[1] > 0x8f)
+ goto fail;
+ /* calli */
+ case 0xe8:
+ /* jmpi */
+ case 0xe9:
+#if CAN_RIP_ADDRESS
+ apply:
+#endif
+ disp = label - (instr + 4);
+ assert((jit_int32_t)disp == disp);
+ *(jit_int32_t *)instr = disp;
+ break;
+ /* jccs */
+ case 0x70 ... 0x7f:
+ /* jmpsi */
+ case 0xeb:
+ disp = label - (instr + 1);
+ assert((jit_int8_t)disp == disp);
+ *(jit_int8_t *)instr = disp;
break;
+ default:
+ fail:
+ abort();
}
}
#endif
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
*/
#if PROTO
-# if __X32
-# define sse_address_p(i0) 1
-# else
-# if __X64_32
-# define sse_address_p(i0) ((jit_word_t)(i0) >= 0)
-# else
-# define sse_address_p(i0) can_sign_extend_int_p(i0)
-# endif
-# endif
# define _XMM6_REGNO 6
# define _XMM7_REGNO 7
# define _XMM8_REGNO 8
jit_word_t i0, jit_int32_t r0, \
jit_float##size##_t *i1) \
{ \
- jit_word_t word; \
+ jit_word_t w; \
jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \
jit_class_nospill); \
assert(jit_sse_reg_p(reg)); \
sse_movi_##type(rn(reg), i1); \
- word = sse_b##name##r_##type(i0, r0, rn(reg)); \
+ w = sse_b##name##r_##type(i0, r0, rn(reg)); \
jit_unget_reg(reg); \
- return (word); \
+ return (w); \
}
# define fopi(name) fpr_opi(name, f, 32)
# define fbopi(name) fpr_bopi(name, f, 32)
ldi = !_jitc->no_data;
#if __X64
/* if will allocate a register for offset, just use immediate */
- if (ldi && !sse_address_p(i0))
+# if CAN_RIP_ADDRESS
+ if (ldi) {
+ jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ ldi = can_sign_extend_int_p(rel);
+ if (!ldi && address_p(i0))
+ ldi = 1;
+ }
+# else
+ if (ldi && !address_p(i0))
ldi = 0;
+# endif
#endif
if (ldi)
sse_ldi_f(r0, (jit_word_t)i0);
}
ixorr(reg, reg);
ucomissr(r2, r1);
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_E, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
}
imovi(reg, 1);
ucomissr(r2, r1);
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_NE, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
_sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ if (can_sign_extend_int_p(rel))
+ movssmr(rel, _NOREG, _NOREG, _SCL8, r0);
+ else
+#endif
+ if (address_p(i0))
movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
else {
reg = jit_get_reg(jit_class_gpr);
_sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ if (can_sign_extend_int_p(rel))
+ movssrm(r0, rel, _NOREG, _NOREG, _SCL8);
+ else
+#endif
+ if (address_p(i0))
movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
_sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r1, r0);
- ja(i0);
- return (_jit->pc.w);
+ return (ja(i0));
}
fbopi(lt)
_sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r1, r0);
- jae(i0);
- return (_jit->pc.w);
+ return (jae(i0));
}
fbopi(le)
static jit_word_t
_sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_word_t jp_code;
ucomissr(r0, r1);
- jps(0);
- jp_code = _jit->pc.w;
- je(i0);
- patch_rel_char(jp_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jps(0);
+ w = je(i0);
+ patch_at(jp_code, _jit->pc.w);
+ return (w);
}
fbopi(eq)
_sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jae(i0);
- return (_jit->pc.w);
+ return (jae(i0));
}
fbopi(ge)
_sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- ja(i0);
- return (_jit->pc.w);
+ return (ja(i0));
}
fbopi(gt)
static jit_word_t
_sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_word_t jp_code;
jit_word_t jz_code;
ucomissr(r0, r1);
- jps(0);
- jp_code = _jit->pc.w;
- jzs(0);
- jz_code = _jit->pc.w;
- patch_rel_char(jp_code, _jit->pc.w);
- jmpi(i0);
- patch_rel_char(jz_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jps(0);
+ jz_code = jzs(0);
+ patch_at(jp_code, _jit->pc.w);
+ w = jmpi(i0);
+ patch_at(jz_code, _jit->pc.w);
+ return (w);
}
fbopi(ne)
_sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jnae(i0);
- return (_jit->pc.w);
+ return (jnae(i0));
}
fbopi(unlt)
static jit_word_t
_sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomissr(r0, r1);
- jna(i0);
+ w = jna(i0);
}
- return (_jit->pc.w);
+ return (w);
}
fbopi(unle)
static jit_word_t
_sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomissr(r0, r1);
- je(i0);
+ w = je(i0);
}
- return (_jit->pc.w);
+ return (w);
}
fbopi(uneq)
static jit_word_t
_sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomissr(r1, r0);
- jna(i0);
+ w = jna(i0);
}
- return (_jit->pc.w);
+ return (w);
}
fbopi(unge)
_sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r1, r0);
- jnae(i0);
- return (_jit->pc.w);
+ return (jnae(i0));
}
fbopi(ungt)
_sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jne(i0);
- return (_jit->pc.w);
+ return (jne(i0));
}
fbopi(ltgt)
_sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jnp(i0);
- return (_jit->pc.w);
+ return (jnp(i0));
}
fbopi(ord)
_sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomissr(r0, r1);
- jp(i0);
- return (_jit->pc.w);
+ return (jp(i0));
}
fbopi(unord)
}
ixorr(reg, reg);
ucomisdr(r2, r1);
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_E, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
}
imovi(reg, 1);
ucomisdr(r2, r1);
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_NE, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
ldi = !_jitc->no_data;
#if __X64
/* if will allocate a register for offset, just use immediate */
- if (ldi && !sse_address_p(i0))
+# if CAN_RIP_ADDRESS
+ if (ldi) {
+ jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ ldi = can_sign_extend_int_p(rel);
+ if (!ldi && address_p(i0))
+ ldi = 1;
+ }
+# else
+ if (ldi && !address_p(i0))
ldi = 0;
+# endif
#endif
if (ldi)
sse_ldi_d(r0, (jit_word_t)i0);
movdqxr(r0, rn(reg));
jit_unget_reg(reg);
#else
+ CHECK_CVT_OFFSET();
movi(rn(reg), data.ii[0]);
stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
movi(rn(reg), data.ii[1]);
_sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
{
jit_int32_t reg;
- if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ if (can_sign_extend_int_p(rel))
+ movsdmr(rel, _NOREG, _NOREG, _SCL8, r0);
+ else
+#endif
+ if (address_p(i0))
movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
else {
reg = jit_get_reg(jit_class_gpr);
_sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
{
jit_int32_t reg;
- if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+ jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+ if (can_sign_extend_int_p(rel))
+ movsdrm(r0, rel, _NOREG, _NOREG, _SCL8);
+ else
+#endif
+ if (address_p(i0))
movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
else {
reg = jit_get_reg(jit_class_gpr);
_sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r1, r0);
- ja(i0);
- return (_jit->pc.w);
+ return (ja(i0));
}
dbopi(lt)
_sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r1, r0);
- jae(i0);
- return (_jit->pc.w);
+ return (jae(i0));
}
dbopi(le)
static jit_word_t
_sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_word_t jp_code;
ucomisdr(r0, r1);
- jps(0);
- jp_code = _jit->pc.w;
- je(i0);
- patch_rel_char(jp_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jps(0);
+ w = je(i0);
+ patch_at(jp_code, _jit->pc.w);
+ return (w);
}
dbopi(eq)
_sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jae(i0);
- return (_jit->pc.w);
+ return (jae(i0));
}
dbopi(ge)
_sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- ja(i0);
- return (_jit->pc.w);
+ return (ja(i0));
}
dbopi(gt)
static jit_word_t
_sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_word_t jp_code;
jit_word_t jz_code;
ucomisdr(r0, r1);
- jps(0);
- jp_code = _jit->pc.w;
- jzs(0);
- jz_code = _jit->pc.w;
- patch_rel_char(jp_code, _jit->pc.w);
- jmpi(i0);
- patch_rel_char(jz_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jps(0);
+ jz_code = jzs(0);
+ patch_at(jp_code, _jit->pc.w);
+ w = jmpi(i0);
+ patch_at(jz_code, _jit->pc.w);
+ return (w);
}
dbopi(ne)
_sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jnae(i0);
- return (_jit->pc.w);
+ return (jnae(i0));
}
dbopi(unlt)
static jit_word_t
_sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomisdr(r0, r1);
- jna(i0);
+ w = jna(i0);
}
- return (_jit->pc.w);
+ return (w);
}
dbopi(unle)
static jit_word_t
_sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomisdr(r0, r1);
- je(i0);
+ w = je(i0);
}
- return (_jit->pc.w);
+ return (w);
}
dbopi(uneq)
static jit_word_t
_sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
if (r0 == r1)
- jmpi(i0);
+ w = jmpi(i0);
else {
ucomisdr(r1, r0);
- jna(i0);
+ w = jna(i0);
}
- return (_jit->pc.w);
+ return (w);
}
dbopi(unge)
_sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r1, r0);
- jnae(i0);
- return (_jit->pc.w);
+ return (jnae(i0));
}
dbopi(ungt)
_sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jne(i0);
- return (_jit->pc.w);
+ return (jne(i0));
}
dbopi(ltgt)
_sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jnp(i0);
- return (_jit->pc.w);
+ return (jnp(i0));
}
dbopi(ord)
_sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
ucomisdr(r0, r1);
- jp(i0);
- return (_jit->pc.w);
+ return (jp(i0));
}
dbopi(unord)
# undef fopi
#define JIT_INSTR_MAX 42
0, /* data */
0, /* live */
- 3, /* align */
+ 11, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
3, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
3, /* va_start */
5, /* va_arg */
7, /* va_arg_d */
5, /* addxi */
4, /* subr */
6, /* subi */
- 6, /* subcr */
+ 12, /* subcr */
6, /* subci */
- 6, /* subxr */
+ 12, /* subxr */
5, /* subxi */
8, /* rsbi */
5, /* mulr */
22, /* divr_u */
25, /* divi_u */
23, /* qdivr */
- 26, /* qdivi */
+ 28, /* qdivi */
24, /* qdivr_u */
- 27, /* qdivi_u */
+ 29, /* qdivi_u */
21, /* remr */
24, /* remi */
22, /* remr_u */
5, /* movi */
5, /* movnr */
5, /* movzr */
+ 9, /* casr */
+ 13, /* casi */
11, /* extr_c */
11, /* extr_uc */
3, /* extr_s */
3, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
+ 7, /* bswapr_us */
+ 4, /* bswapr_ui */
+ 0, /* bswapr_ul */
7, /* htonr_us */
4, /* htonr_ui */
0, /* htonr_ul */
2, /* callr */
5, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
4, /* extr_d */
4, /* extr_f_d */
10, /* movr_d */
- 24, /* movi_d */
+ 33, /* movi_d */
4, /* ldr_d */
8, /* ldi_d */
5, /* ldxr_d */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 7, /* bswapr_us */
- 4, /* bswapr_ui */
- 0, /* bswapr_ul */
- 9, /* casr */
- 13, /* casi */
-#endif
+ 21, /* clo */
+ 17, /* clz */
+ 15, /* cto */
+ 11, /* ctz */
+#endif /* __X32 */
#if __X64
#if __CYGWIN__ || _WIN32
#define JIT_INSTR_MAX 130
0, /* data */
0, /* live */
- 6, /* align */
+ 27, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
7, /* label */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
7, /* va_start */
7, /* va_arg */
9, /* va_arg_d */
10, /* movi */
7, /* movnr */
7, /* movzr */
+ 11, /* casr */
+ 21, /* casi */
7, /* extr_c */
7, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
3, /* extr_i */
3, /* extr_ui */
+ 9, /* bswapr_us */
+ 6, /* bswapr_ui */
+ 6, /* bswapr_ul */
9, /* htonr_us */
6, /* htonr_ui */
6, /* htonr_ul */
4, /* ldr_c */
- 15, /* ldi_c */
+ 14, /* ldi_c */
4, /* ldr_uc */
- 15, /* ldi_uc */
+ 14, /* ldi_uc */
4, /* ldr_s */
- 15, /* ldi_s */
+ 14, /* ldi_s */
4, /* ldr_us */
- 15, /* ldi_us */
+ 14, /* ldi_us */
3, /* ldr_i */
- 14, /* ldi_i */
+ 13, /* ldi_i */
3, /* ldr_ui */
- 14, /* ldi_ui */
+ 13, /* ldi_ui */
3, /* ldr_l */
- 14, /* ldi_l */
+ 13, /* ldi_l */
5, /* ldxr_c */
8, /* ldxi_c */
5, /* ldxr_uc */
4, /* ldxr_l */
7, /* ldxi_l */
6, /* str_c */
- 17, /* sti_c */
+ 16, /* sti_c */
4, /* str_s */
- 15, /* sti_s */
+ 14, /* sti_s */
3, /* str_i */
- 14, /* sti_i */
+ 13, /* sti_i */
3, /* str_l */
- 14, /* sti_l */
+ 13, /* sti_l */
7, /* stxr_c */
7, /* stxi_c */
5, /* stxr_s */
10, /* bxsubi */
9, /* bxsubr_u */
10, /* bxsubi_u */
- 3, /* jmpr */
+ 2, /* jmpr */
5, /* jmpi */
- 3, /* callr */
- 13, /* calli */
+ 2, /* callr */
+ 20, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* putargr_f */
0, /* putargi_f */
10, /* addr_f */
- 21, /* addi_f */
+ 19, /* addi_f */
15, /* subr_f */
- 21, /* subi_f */
- 27, /* rsbi_f */
+ 19, /* subi_f */
+ 26, /* rsbi_f */
10, /* mulr_f */
- 21, /* muli_f */
+ 19, /* muli_f */
15, /* divr_f */
- 21, /* divi_f */
- 15, /* negr_f */
+ 19, /* divi_f */
+ 14, /* negr_f */
15, /* absr_f */
5, /* sqrtr_f */
16, /* ltr_f */
- 31, /* lti_f */
+ 30, /* lti_f */
16, /* ler_f */
- 31, /* lei_f */
+ 30, /* lei_f */
18, /* eqr_f */
- 33, /* eqi_f */
+ 32, /* eqi_f */
16, /* ger_f */
- 31, /* gei_f */
+ 30, /* gei_f */
16, /* gtr_f */
- 31, /* gti_f */
+ 30, /* gti_f */
20, /* ner_f */
- 35, /* nei_f */
+ 34, /* nei_f */
16, /* unltr_f */
- 31, /* unlti_f */
+ 30, /* unlti_f */
16, /* unler_f */
- 31, /* unlei_f */
+ 30, /* unlei_f */
16, /* uneqr_f */
- 31, /* uneqi_f */
+ 30, /* uneqi_f */
16, /* unger_f */
- 31, /* ungei_f */
+ 30, /* ungei_f */
16, /* ungtr_f */
- 31, /* ungti_f */
+ 30, /* ungti_f */
16, /* ltgtr_f */
- 31, /* ltgti_f */
+ 30, /* ltgti_f */
16, /* ordr_f */
- 31, /* ordi_f */
+ 30, /* ordi_f */
16, /* unordr_f */
- 31, /* unordi_f */
+ 30, /* unordi_f */
5, /* truncr_f_i */
5, /* truncr_f_l */
5, /* extr_f */
5, /* extr_d_f */
5, /* movr_f */
- 15, /* movi_f */
+ 18, /* movi_f */
5, /* ldr_f */
- 16, /* ldi_f */
+ 15, /* ldi_f */
6, /* ldxr_f */
8, /* ldxi_f */
5, /* str_f */
- 16, /* sti_f */
+ 15, /* sti_f */
6, /* stxr_f */
9, /* stxi_f */
10, /* bltr_f */
- 21, /* blti_f */
+ 19, /* blti_f */
10, /* bler_f */
- 24, /* blei_f */
+ 23, /* blei_f */
12, /* beqr_f */
27, /* beqi_f */
10, /* bger_f */
- 25, /* bgei_f */
+ 24, /* bgei_f */
10, /* bgtr_f */
- 25, /* bgti_f */
+ 24, /* bgti_f */
13, /* bner_f */
- 28, /* bnei_f */
+ 27, /* bnei_f */
10, /* bunltr_f */
- 25, /* bunlti_f */
+ 24, /* bunlti_f */
10, /* bunler_f */
- 25, /* bunlei_f */
+ 24, /* bunlei_f */
10, /* buneqr_f */
- 25, /* buneqi_f */
+ 24, /* buneqi_f */
10, /* bunger_f */
- 25, /* bungei_f */
+ 24, /* bungei_f */
10, /* bungtr_f */
- 25, /* bungti_f */
+ 24, /* bungti_f */
10, /* bltgtr_f */
- 25, /* bltgti_f */
+ 24, /* bltgti_f */
10, /* bordr_f */
- 25, /* bordi_f */
+ 24, /* bordi_f */
10, /* bunordr_f */
- 25, /* bunordi_f */
+ 24, /* bunordi_f */
0, /* pushargr_f */
0, /* pushargi_f */
0, /* retr_f */
25, /* muli_d */
15, /* divr_d */
25, /* divi_d */
- 22, /* negr_d */
+ 21, /* negr_d */
16, /* absr_d */
5, /* sqrtr_d */
17, /* ltr_d */
5, /* extr_d */
5, /* extr_f_d */
5, /* movr_d */
- 15, /* movi_d */
+ 29, /* movi_d */
5, /* ldr_d */
- 16, /* ldi_d */
+ 15, /* ldi_d */
6, /* ldxr_d */
8, /* ldxi_d */
5, /* str_d */
- 16, /* sti_d */
+ 15, /* sti_d */
6, /* stxr_d */
9, /* stxi_d */
11, /* bltr_d */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 9, /* bswapr_us */
- 6, /* bswapr_ui */
- 6, /* bswapr_ul */
- 0, /* casr */
- 0, /* casi */
+ 27, /* clo */
+ 21, /* clz */
+ 20, /* cto */
+ 14, /* ctz */
#else
# if __X64_32
-#define JIT_INSTR_MAX 108
+#define JIT_INSTR_MAX 105
0, /* data */
0, /* live */
- 3, /* align */
+ 7, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
3, /* label */
- 108, /* prolog */
+ 105, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
- 41, /* va_start */
- 45, /* va_arg */
- 54, /* va_arg_d */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
+ 33, /* va_start */
+ 43, /* va_arg */
+ 45, /* va_arg_d */
0, /* va_end */
5, /* addr */
7, /* addi */
6, /* movi */
7, /* movnr */
7, /* movzr */
+ 11, /* casr */
+ 16, /* casi */
7, /* extr_c */
7, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
0, /* extr_i */
0, /* extr_ui */
+ 9, /* bswapr_us */
+ 6, /* bswapr_ui */
+ 0, /* bswapr_ul */
9, /* htonr_us */
6, /* htonr_ui */
0, /* htonr_ul */
8, /* sti_i */
0, /* str_l */
0, /* sti_l */
- 12, /* stxr_c */
+ 11, /* stxr_c */
7, /* stxi_c */
- 10, /* stxr_s */
+ 9, /* stxr_s */
7, /* stxi_s */
- 9, /* stxr_i */
+ 8, /* stxr_i */
6, /* stxi_i */
0, /* stxr_l */
0, /* stxi_l */
10, /* bxsubi_u */
2, /* jmpr */
5, /* jmpi */
- 3, /* callr */
+ 2, /* callr */
9, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
0, /* putargr_f */
0, /* putargi_f */
10, /* addr_f */
- 21, /* addi_f */
+ 20, /* addi_f */
15, /* subr_f */
- 21, /* subi_f */
- 26, /* rsbi_f */
+ 20, /* subi_f */
+ 25, /* rsbi_f */
10, /* mulr_f */
- 21, /* muli_f */
+ 20, /* muli_f */
15, /* divr_f */
- 21, /* divi_f */
+ 20, /* divi_f */
15, /* negr_f */
15, /* absr_f */
5, /* sqrtr_f */
11, /* movi_f */
6, /* ldr_f */
10, /* ldi_f */
- 11, /* ldxr_f */
+ 10, /* ldxr_f */
9, /* ldxi_f */
6, /* str_f */
10, /* sti_f */
- 11, /* stxr_f */
+ 10, /* stxr_f */
9, /* stxi_f */
10, /* bltr_f */
- 21, /* blti_f */
+ 20, /* blti_f */
10, /* bler_f */
- 21, /* blei_f */
+ 20, /* blei_f */
12, /* beqr_f */
23, /* beqi_f */
10, /* bger_f */
- 21, /* bgei_f */
+ 20, /* bgei_f */
10, /* bgtr_f */
- 21, /* bgti_f */
+ 20, /* bgti_f */
13, /* bner_f */
- 24, /* bnei_f */
+ 23, /* bnei_f */
10, /* bunltr_f */
- 21, /* bunlti_f */
+ 20, /* bunlti_f */
10, /* bunler_f */
- 21, /* bunlei_f */
+ 20, /* bunlei_f */
10, /* buneqr_f */
- 21, /* buneqi_f */
+ 20, /* buneqi_f */
10, /* bunger_f */
- 21, /* bungei_f */
+ 20, /* bungei_f */
10, /* bungtr_f */
- 21, /* bungti_f */
+ 20, /* bungti_f */
10, /* bltgtr_f */
- 21, /* bltgti_f */
+ 20, /* bltgti_f */
10, /* bordr_f */
- 21, /* bordi_f */
+ 20, /* bordi_f */
10, /* bunordr_f */
- 21, /* bunordi_f */
+ 20, /* bunordi_f */
0, /* pushargr_f */
0, /* pushargi_f */
0, /* retr_f */
0, /* putargr_d */
0, /* putargi_d */
10, /* addr_d */
- 33, /* addi_d */
+ 29, /* addi_d */
15, /* subr_d */
- 33, /* subi_d */
- 38, /* rsbi_d */
+ 29, /* subi_d */
+ 34, /* rsbi_d */
10, /* mulr_d */
- 33, /* muli_d */
+ 29, /* muli_d */
15, /* divr_d */
- 33, /* divi_d */
+ 29, /* divi_d */
22, /* negr_d */
16, /* absr_d */
5, /* sqrtr_d */
23, /* movi_d */
6, /* ldr_d */
10, /* ldi_d */
- 11, /* ldxr_d */
+ 10, /* ldxr_d */
9, /* ldxi_d */
6, /* str_d */
10, /* sti_d */
- 11, /* stxr_d */
+ 10, /* stxr_d */
9, /* stxi_d */
11, /* bltr_d */
- 34, /* blti_d */
+ 30, /* blti_d */
11, /* bler_d */
- 34, /* blei_d */
+ 30, /* blei_d */
13, /* beqr_d */
36, /* beqi_d */
11, /* bger_d */
- 34, /* bgei_d */
+ 30, /* bgei_d */
11, /* bgtr_d */
- 34, /* bgti_d */
+ 30, /* bgti_d */
14, /* bner_d */
- 37, /* bnei_d */
+ 33, /* bnei_d */
11, /* bunltr_d */
- 34, /* bunlti_d */
+ 30, /* bunlti_d */
11, /* bunler_d */
- 34, /* bunlei_d */
+ 30, /* bunlei_d */
11, /* buneqr_d */
- 34, /* buneqi_d */
+ 30, /* buneqi_d */
11, /* bunger_d */
- 34, /* bungei_d */
+ 30, /* bungei_d */
11, /* bungtr_d */
- 34, /* bungti_d */
+ 30, /* bungti_d */
11, /* bltgtr_d */
- 34, /* bltgti_d */
+ 30, /* bltgti_d */
11, /* bordr_d */
- 34, /* bordi_d */
+ 30, /* bordi_d */
11, /* bunordr_d */
- 34, /* bunordi_d */
+ 30, /* bunordi_d */
0, /* pushargr_d */
0, /* pushargi_d */
0, /* retr_d */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 9, /* bswapr_us */
- 6, /* bswapr_ui */
- 0, /* bswapr_ul */
- 0, /* casr */
- 0, /* casi */
+ 11, /* clo */
+ 5, /* clz */
+ 11, /* cto */
+ 5, /* ctz */
+#else
-# else
-#define JIT_INSTR_MAX 115
+#define JIT_INSTR_MAX 112
0, /* data */
0, /* live */
- 6, /* align */
+ 27, /* align */
0, /* save */
0, /* load */
+ 4, /* skip */
0, /* #name */
0, /* #note */
7, /* label */
- 115, /* prolog */
+ 112, /* prolog */
0, /* ellipsis */
0, /* va_push */
0, /* allocai */
0, /* allocar */
- 0, /* arg */
+ 0, /* arg_c */
+ 0, /* arg_s */
+ 0, /* arg_i */
+ 0, /* arg_l */
0, /* getarg_c */
0, /* getarg_uc */
0, /* getarg_s */
0, /* getarg_i */
0, /* getarg_ui */
0, /* getarg_l */
- 0, /* putargr */
- 0, /* putargi */
+ 0, /* putargr_c */
+ 0, /* putargi_c */
+ 0, /* putargr_uc */
+ 0, /* putargi_uc */
+ 0, /* putargr_s */
+ 0, /* putargi_s */
+ 0, /* putargr_us */
+ 0, /* putargi_us */
+ 0, /* putargr_i */
+ 0, /* putargi_i */
+ 0, /* putargr_ui */
+ 0, /* putargi_ui */
+ 0, /* putargr_l */
+ 0, /* putargi_l */
38, /* va_start */
41, /* va_arg */
48, /* va_arg_d */
10, /* movi */
7, /* movnr */
7, /* movzr */
+ 11, /* casr */
+ 16, /* casi */
4, /* extr_c */
4, /* extr_uc */
4, /* extr_s */
4, /* extr_us */
3, /* extr_i */
3, /* extr_ui */
+ 9, /* bswapr_us */
+ 6, /* bswapr_ui */
+ 6, /* bswapr_ul */
9, /* htonr_us */
6, /* htonr_ui */
6, /* htonr_ul */
9, /* bxsubr_u */
10, /* bxsubi_u */
2, /* jmpr */
- 13, /* jmpi */
- 3, /* callr */
- 12, /* calli */
+ 5, /* jmpi */
+ 2, /* callr */
+ 13, /* calli */
0, /* prepare */
- 0, /* pushargr */
- 0, /* pushargi */
+ 0, /* pushargr_c */
+ 0, /* pushargi_c */
+ 0, /* pushargr_uc */
+ 0, /* pushargi_uc */
+ 0, /* pushargr_s */
+ 0, /* pushargi_s */
+ 0, /* pushargr_us */
+ 0, /* pushargi_us */
+ 0, /* pushargr_i */
+ 0, /* pushargi_i */
+ 0, /* pushargr_ui */
+ 0, /* pushargi_ui */
+ 0, /* pushargr_l */
+ 0, /* pushargi_l */
0, /* finishr */
0, /* finishi */
0, /* ret */
- 0, /* retr */
- 0, /* reti */
+ 0, /* retr_c */
+ 0, /* reti_c */
+ 0, /* retr_uc */
+ 0, /* reti_uc */
+ 0, /* retr_s */
+ 0, /* reti_s */
+ 0, /* retr_us */
+ 0, /* reti_us */
+ 0, /* retr_i */
+ 0, /* reti_i */
+ 0, /* retr_ui */
+ 0, /* reti_ui */
+ 0, /* retr_l */
+ 0, /* reti_l */
0, /* retval_c */
0, /* retval_uc */
0, /* retval_s */
10, /* bltr_f */
20, /* blti_f */
10, /* bler_f */
- 25, /* blei_f */
+ 22, /* blei_f */
12, /* beqr_f */
- 27, /* beqi_f */
+ 22, /* beqi_f */
10, /* bger_f */
- 25, /* bgei_f */
+ 22, /* bgei_f */
10, /* bgtr_f */
- 25, /* bgti_f */
+ 22, /* bgti_f */
13, /* bner_f */
- 28, /* bnei_f */
+ 25, /* bnei_f */
10, /* bunltr_f */
- 25, /* bunlti_f */
+ 23, /* bunlti_f */
10, /* bunler_f */
- 25, /* bunlei_f */
+ 23, /* bunlei_f */
10, /* buneqr_f */
- 25, /* buneqi_f */
+ 23, /* buneqi_f */
10, /* bunger_f */
- 25, /* bungei_f */
+ 23, /* bungei_f */
10, /* bungtr_f */
- 25, /* bungti_f */
+ 22, /* bungti_f */
10, /* bltgtr_f */
- 25, /* bltgti_f */
+ 22, /* bltgti_f */
10, /* bordr_f */
- 25, /* bordi_f */
+ 22, /* bordi_f */
10, /* bunordr_f */
- 25, /* bunordi_f */
+ 22, /* bunordi_f */
0, /* pushargr_f */
0, /* pushargi_f */
0, /* retr_f */
0, /* movi_d_ww */
0, /* movr_d_w */
0, /* movi_d_w */
- 9, /* bswapr_us */
- 6, /* bswapr_ui */
- 6, /* bswapr_ul */
- 11, /* casr */
- 16, /* casi */
+ 11, /* clo */
+ 5, /* clz */
+ 11, /* cto */
+ 5, /* ctz */
#endif /* __CYGWIN__ || _WIN32 */
# endif /* __X64_32 */
#endif /* __X64 */
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
jit_word_t i0, jit_int32_t r0, \
jit_float##size##_t *i1) \
{ \
- jit_word_t word; \
+ jit_word_t w; \
jit_int32_t reg = jit_get_reg(jit_class_fpr| \
jit_class_nospill); \
assert(jit_x87_reg_p(reg)); \
x87_movi_##type(rn(reg), i1); \
- word = x87_b##name##r_##type(i0, r0, rn(reg)); \
+ w = x87_b##name##r_##type(i0, r0, rn(reg)); \
jit_unget_reg(reg); \
- return (word); \
+ return (w); \
}
# define fopi(name) fpr_opi(name, f, 32)
# define fbopi(name) fpr_bopi(name, f, 32)
static void
_x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ CHECK_CVT_OFFSET();
#if defined(sun)
/* for the sake of passing test cases in x87 mode, otherwise only sse
* is supported */
static void
_x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ CHECK_CVT_OFFSET();
fldr(r1);
fisttpqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
ldxi(r0, _RBP_REGNO, CVT_OFFSET);
static void
_x87_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ CHECK_CVT_OFFSET();
stxi(CVT_OFFSET, _RBP_REGNO, r1);
# if __X32
fildlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
fldr(r0);
fucomipr(r1 + 1);
}
- jcc(code, i0);
- return (_jit->pc.w);
+ return (jcc(code, i0));
}
static jit_word_t
fldr(f0);
fucomipr(f1 + 1);
}
- jcc(code, i0);
- return (_jit->pc.w);
+ return (jcc(code, i0));
}
fopi(lt)
fldln2();
else {
if (_jitc->no_data) {
+ CHECK_CVT_OFFSET();
reg = jit_get_reg(jit_class_gpr);
movi(rn(reg), data.i);
stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
fldln2();
else {
if (_jitc->no_data) {
+ CHECK_CVT_OFFSET();
reg = jit_get_reg(jit_class_gpr);
#if __X32 || __X64_32
movi(rn(reg), data.ii[0]);
fldr(f1);
fucomipr(f2 + 1);
}
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_E, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
fldr(f1);
fucomipr(f2 + 1);
}
- jpes(0);
- jp_code = _jit->pc.w;
+ jp_code = jpes(0);
cc(X86_CC_NE, reg);
- patch_rel_char(jp_code, _jit->pc.w);
+ patch_at(jp_code, _jit->pc.w);
if (!rc)
xchgr(r0, reg);
}
static jit_word_t
_x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_int32_t f0, f1;
jit_word_t jp_code;
if (r1 == _ST0_REGNO) f0 = r1, f1 = r0;
fldr(f0);
fucomipr(f1 + 1);
}
- jpes(0);
- jp_code = _jit->pc.w;
- jcc(X86_CC_E, i0);
- patch_rel_char(jp_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jpes(0);
+ w = jcc(X86_CC_E, i0);
+ patch_at(jp_code, _jit->pc.w);
+ return (w);
}
dbopi(eq)
dbopi(ge)
static jit_word_t
_x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
{
+ jit_word_t w;
jit_int32_t f0, f1;
jit_word_t jp_code;
jit_word_t jz_code;
fldr(f0);
fucomipr(f1 + 1);
}
- jpes(0);
- jp_code = _jit->pc.w;
- jzs(0);
- jz_code = _jit->pc.w;
- patch_rel_char(jp_code, _jit->pc.w);
- jmpi(i0);
- patch_rel_char(jz_code, _jit->pc.w);
- return (_jit->pc.w);
+ jp_code = jpes(0);
+ jz_code = jzs(0);
+ patch_at(jp_code, _jit->pc.w);
+ w = jmpi(i0);
+ patch_at(jz_code, _jit->pc.w);
+ return (w);
}
dbopi(ne)
dbopi(unlt)
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
#include <lightning/jit_private.h>
#if __X32
+# define CAN_RIP_ADDRESS 0
+# define address_p(i0) 1
# define jit_arg_reg_p(i) 0
# define jit_arg_f_reg_p(i) 0
-# define stack_framesize 20
-# define stack_adjust 12
-# define CVT_OFFSET -12
+/* callee save + 16 byte align
+ * align16(%ebp + %rbx + %rsi + %rdi) + (16 - 4) */
+# define stack_framesize 28
# define REAL_WORDSIZE 4
# define va_gp_increment 4
# define va_fp_increment 8
#else
+# if _WIN32 || __X64_32
+# define CAN_RIP_ADDRESS 0
+# else
+# define CAN_RIP_ADDRESS 1
+# endif
+# if __X64_32
+# define address_p(i0) ((jit_word_t)(i0) >= 0)
+# else
+# define address_p(i0) can_sign_extend_int_p(i0)
+# endif
# if __CYGWIN__ || _WIN32
# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4)
# define jit_arg_f_reg_p(i) jit_arg_reg_p(i)
+/* callee save + 16 byte align
+ * align16(%rbp+%rbx+%rdi+%rsi+%r1[2-5]+%xmm[6-9]+%xmm1[0-5]) + (16 - 8) */
# define stack_framesize 152
# define va_fp_increment 8
# else
# define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6)
# define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8)
+/* callee save + 16 byte align
+ * align16(%rbp + %r15 + %r14 + %r13 + %r12 + %rbx) + (16 - 8) */
# define stack_framesize 56
# define first_gp_argument rdi
# define first_gp_offset offsetof(jit_va_list_t, rdi)
# define first_fp_from_offset(fp) (((fp) - va_gp_max_offset) / 16)
# endif
# define va_gp_increment 8
-# define stack_adjust 8
-# define CVT_OFFSET -8
# define REAL_WORDSIZE 8
#endif
+#define CVT_OFFSET _jitc->function->cvt_offset
+
+#define CHECK_CVT_OFFSET() \
+ do { \
+ if (!_jitc->function->cvt_offset) { \
+ _jitc->again = 1; \
+ _jitc->function->cvt_offset = \
+ jit_allocai(sizeof(jit_float64_t)); \
+ } \
+ } while (0)
/*
* Types
/*
* Prototypes
*/
+#define compute_framesize() _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
#define patch(instr, node) _patch(_jit, instr, node)
static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
#define sse_from_x87_f(r0, r1) _sse_from_x87_f(_jit, r0, r1)
{ _NOREG, "<none>" },
};
+static jit_int32_t iregs[] = {
+#if __X32
+ _RBX, _RSI, _RDI,
+#elif (__CYGWIN__ || _WIN32)
+ _RBX, _RDI, _RSI, _R12, _R13, _R14, _R15,
+#else
+ _R15, _R14, _R13, _R12, _RBX,
+#endif
+};
+
+#if __X64 && (__CYGWIN__ || _WIN32)
+static jit_int32_t fregs[] = {
+ _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
+};
+#endif
+
/*
* Implementation
*/
jit_get_cpu(void)
{
union {
+ /* eax=7 and ecx=0 */
+ struct {
+ jit_uword_t fsgsbase : 1;
+ jit_uword_t IA32_TSC_ADJUST : 1;
+ jit_uword_t sgx : 1;
+ jit_uword_t bmi1 : 1;
+ jit_uword_t hle : 1;
+ jit_uword_t avx2 : 1;
+ jit_uword_t FDP_EXCPTN_ONLY : 1;
+ jit_uword_t smep : 1;
+ jit_uword_t bmi2 : 1;
+ jit_uword_t erms : 1;
+ jit_uword_t invpcid : 1;
+ jit_uword_t rtm : 1;
+ jit_uword_t rdt_m_pqm : 1;
+ jit_uword_t dep_FPU_CS_DS : 1;
+ jit_uword_t mpx : 1;
+ jit_uword_t rdt_a_pqe : 1;
+ jit_uword_t avx512_f : 1;
+ jit_uword_t avx512_dq : 1;
+ jit_uword_t rdseed : 1;
+ jit_uword_t adx : 1;
+ jit_uword_t smap : 1;
+ jit_uword_t avx512_ifma : 1;
+ jit_uword_t __reserved0 : 1;
+ jit_uword_t clflushopt : 1;
+ jit_uword_t clwb : 1;
+ jit_uword_t pt : 1;
+ jit_uword_t avx512_pf : 1;
+ jit_uword_t avx512_er : 1;
+ jit_uword_t avx512_cd : 1;
+ jit_uword_t sha : 1;
+ jit_uword_t avx512_bw : 1;
+ jit_uword_t avx512_vl : 1;
+ } bits;
+ jit_uword_t cpuid;
+ } ebx;
+ union {
+ /* eax=0 */
struct {
jit_uint32_t sse3 : 1;
jit_uint32_t pclmulqdq : 1;
jit_uword_t cpuid;
} ecx;
union {
+ /* eax=0 */
struct {
jit_uint32_t fpu : 1;
jit_uint32_t vme : 1;
#if __X32
int ac, flags;
#endif
- jit_uword_t eax, ebx;
+ jit_uword_t eax;
#if __X32
/* adapted from glibc __sysconf */
#else
__asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
#endif
- : "=a" (eax), "=r" (ebx),
+ : "=a" (eax), "=r" (ebx.cpuid),
"=c" (ecx.cpuid), "=d" (edx.cpuid)
: "0" (1));
jit_cpu.aes = ecx.bits.aes;
jit_cpu.avx = ecx.bits.avx;
+ /* query %eax = 7 and ecx = 0 function */
+#if __X64
+ __asm__ volatile ("cpuid"
+ : "=a" (eax), "=b" (ebx.cpuid), "=c" (ecx), "=d" (edx)
+ : "a" (7), "c" (0));
+#endif
+ jit_cpu.adx = ebx.bits.adx;
+
+
/* query %eax = 0x80000001 function */
#if __X64
# if __X64_32
# else
__asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
# endif
- : "=a" (eax), "=r" (ebx),
+ : "=a" (eax), "=r" (ebx.cpuid),
"=c" (ecx.cpuid), "=d" (edx.cpuid)
: "0" (0x80000001));
- jit_cpu.lahf = ecx.cpuid & 1;
+ jit_cpu.lahf = !!(ecx.cpuid & 1);
+ jit_cpu.abm = !!(ecx.cpuid & 32);
#endif
}
_jitc->functions.length += 16;
}
_jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
- _jitc->function->self.size = stack_framesize;
+ /* One extra stack slot for implicit saved returned address */
+ _jitc->function->self.size = stack_framesize + REAL_WORDSIZE;
_jitc->function->self.argi = _jitc->function->self.argf =
_jitc->function->self.aoff = _jitc->function->self.alen = 0;
- /* sse/x87 conversion */
- _jitc->function->self.aoff = CVT_OFFSET;
+ _jitc->function->cvt_offset = 0;
+#if __X64 && (__CYGWIN__ || _WIN32)
+ /* force framepointer */
+ jit_check_frame();
+#endif
_jitc->function->self.call = jit_call_default;
jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
_jitc->reglen * sizeof(jit_int32_t));
_jit_allocai(jit_state_t *_jit, jit_int32_t length)
{
assert(_jitc->function);
+ jit_check_frame();
+#if __X32
+ /* Stack is 4 bytes aligned but jit functions keep it 8 bytes aligned.
+ * Called functions have 16 byte aligned stack. */
+ if (!_jitc->function->self.aoff)
+ _jitc->function->self.aoff = -4;
+#endif
switch (length) {
case 0: case 1: break;
case 2: _jitc->function->self.aoff &= -2; break;
}
void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
-{
- jit_inc_synth_w(retr, u);
- /* movr(%ret, %ret) would be optimized out */
- if (JIT_RET != u)
- jit_movr(JIT_RET, u);
- /* explicitly tell it is live */
- jit_live(JIT_RET);
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
+{
+ jit_code_inc_synth_w(code, u);
+ jit_movr(JIT_RET, u);
jit_ret();
jit_dec_synth();
}
void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
- jit_inc_synth_w(reti, u);
+ jit_code_inc_synth_w(code, u);
jit_movi(JIT_RET, u);
jit_ret();
jit_dec_synth();
jit_bool_t
_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
{
- if (u->code == jit_code_arg)
+ if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
return (jit_arg_reg_p(u->u.w));
assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
return (jit_arg_f_reg_p(u->u.w));
_jit_ellipsis(jit_state_t *_jit)
{
jit_inc_synth(ellipsis);
+ jit_check_frame();
if (_jitc->prepare) {
jit_link_prepare();
/* Remember that a varargs function call is being constructed. */
}
jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
{
jit_node_t *node;
jit_int32_t offset;
assert(_jitc->function);
assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+ assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
#if __X64
if (jit_arg_reg_p(_jitc->function->self.argi)) {
offset = _jitc->function->self.argi++;
{
offset = _jitc->function->self.size;
_jitc->function->self.size += REAL_WORDSIZE;
+ jit_check_frame();
}
- node = jit_new_node_ww(jit_code_arg, offset,
+ node = jit_new_node_ww(code, offset,
++_jitc->function->self.argn);
jit_link_prolog();
return (node);
{
offset = _jitc->function->self.size;
_jitc->function->self.size += REAL_WORDSIZE;
+ jit_check_frame();
}
node = jit_new_node_ww(jit_code_arg_f, offset,
++_jitc->function->self.argn);
{
offset = _jitc->function->self.size;
_jitc->function->self.size += sizeof(jit_float64_t);
+ jit_check_frame();
}
node = jit_new_node_ww(jit_code_arg_d, offset,
++_jitc->function->self.argn);
void
_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_c, u, v);
#if __X64
if (jit_arg_reg_p(v->u.w))
jit_extr_c(u, JIT_RA0 - v->u.w);
else
#endif
- jit_ldxi_c(u, _RBP, v->u.w);
+ {
+ jit_node_t *node = jit_ldxi_c(u, _RBP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_c);
jit_inc_synth_wp(getarg_uc, u, v);
#if __X64
if (jit_arg_reg_p(v->u.w))
jit_extr_uc(u, JIT_RA0 - v->u.w);
else
#endif
- jit_ldxi_uc(u, _RBP, v->u.w);
+ {
+ jit_node_t *node = jit_ldxi_uc(u, _RBP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_s, u, v);
#if __X64
if (jit_arg_reg_p(v->u.w))
jit_extr_s(u, JIT_RA0 - v->u.w);
else
#endif
- jit_ldxi_s(u, _RBP, v->u.w);
+ {
+ jit_node_t *node = jit_ldxi_s(u, _RBP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_s);
jit_inc_synth_wp(getarg_us, u, v);
#if __X64
if (jit_arg_reg_p(v->u.w))
jit_extr_us(u, JIT_RA0 - v->u.w);
else
#endif
- jit_ldxi_us(u, _RBP, v->u.w);
+ {
+ jit_node_t *node = jit_ldxi_us(u, _RBP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_i, u, v);
#if __X64
if (jit_arg_reg_p(v->u.w)) {
}
else
#endif
- jit_ldxi_i(u, _RBP, v->u.w);
+ {
+ jit_node_t *node = jit_ldxi_i(u, _RBP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_i);
jit_inc_synth_wp(getarg_ui, u, v);
if (jit_arg_reg_p(v->u.w))
jit_extr_ui(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_ui(u, _RBP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_ui(u, _RBP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
_jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
{
- assert(v->code == jit_code_arg);
+ assert_arg_type(v->code, jit_code_arg_l);
jit_inc_synth_wp(getarg_l, u, v);
if (jit_arg_reg_p(v->u.w))
jit_movr(u, JIT_RA0 - v->u.w);
- else
- jit_ldxi_l(u, _RBP, v->u.w);
+ else {
+ jit_node_t *node = jit_ldxi_l(u, _RBP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
#endif
void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
{
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargr, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
#if __X64
if (jit_arg_reg_p(v->u.w))
jit_movr(JIT_RA0 - v->u.w, u);
else
#endif
- jit_stxi(v->u.w, _RBP, u);
+ {
+ jit_node_t *node = jit_stxi(v->u.w, _RBP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
{
jit_int32_t regno;
- assert(v->code == jit_code_arg);
- jit_inc_synth_wp(putargi, u, v);
+ assert_putarg_type(code, v->code);
+ jit_code_inc_synth_wp(code, u, v);
#if __X64
if (jit_arg_reg_p(v->u.w))
jit_movi(JIT_RA0 - v->u.w, u);
else
#endif
{
+ jit_node_t *node;
regno = jit_get_reg(jit_class_gpr);
jit_movi(regno, u);
- jit_stxi(v->u.w, _RBP, regno);
+ node = jit_stxi(v->u.w, _RBP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
jit_movr_f(u, _XMM0 - v->u.w);
else
#endif
- jit_ldxi_f(u, _RBP, v->u.w);
+ {
+ jit_node_t *node = jit_ldxi_f(u, _RBP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
assert(v->code == jit_code_arg_f);
jit_inc_synth_wp(putargr_f, u, v);
#if __X64
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_f_reg_p(v->u.w))
jit_movr_f(_XMM0 - v->u.w, u);
else
#endif
- jit_stxi_f(v->u.w, _RBP, u);
+ {
+ jit_node_t *node = jit_stxi_f(v->u.w, _RBP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
assert(v->code == jit_code_arg_f);
jit_inc_synth_fp(putargi_f, u, v);
#if __X64
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_f_reg_p(v->u.w))
jit_movi_f(_XMM0 - v->u.w, u);
else
#endif
{
- regno = jit_get_reg(jit_class_gpr);
+ jit_node_t *node;
+ regno = jit_get_reg(jit_class_fpr);
jit_movi_f(regno, u);
- jit_stxi_f(v->u.w, _RBP, regno);
+ node = jit_stxi_f(v->u.w, _RBP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
jit_movr_d(u, _XMM0 - v->u.w);
else
#endif
- jit_ldxi_d(u, _RBP, v->u.w);
+ {
+ jit_node_t *node = jit_ldxi_d(u, _RBP, v->u.w);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
assert(v->code == jit_code_arg_d);
jit_inc_synth_wp(putargr_d, u, v);
#if __X64
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_f_reg_p(v->u.w))
jit_movr_d(_XMM0 - v->u.w, u);
else
#endif
- jit_stxi_d(v->u.w, _RBP, u);
+ {
+ jit_node_t *node = jit_stxi_d(v->u.w, _RBP, u);
+ jit_link_alist(node);
+ }
jit_dec_synth();
}
assert(v->code == jit_code_arg_d);
jit_inc_synth_dp(putargi_d, u, v);
#if __X64
- if (jit_arg_reg_p(v->u.w))
+ if (jit_arg_f_reg_p(v->u.w))
jit_movi_d(_XMM0 - v->u.w, u);
else
#endif
{
- regno = jit_get_reg(jit_class_gpr);
+ jit_node_t *node;
+ regno = jit_get_reg(jit_class_fpr);
jit_movi_d(regno, u);
- jit_stxi_d(v->u.w, _RBP, regno);
+ node = jit_stxi_d(v->u.w, _RBP, regno);
+ jit_link_alist(node);
jit_unget_reg(regno);
}
jit_dec_synth();
}
void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
{
assert(_jitc->function);
- jit_inc_synth_w(pushargr, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
#if __X64
if (jit_arg_reg_p(_jitc->function->call.argi)) {
{
jit_stxi(_jitc->function->call.size, _RSP, u);
_jitc->function->call.size += REAL_WORDSIZE;
+ jit_check_frame();
}
jit_dec_synth();
}
void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
{
jit_int32_t regno;
assert(_jitc->function);
- jit_inc_synth_w(pushargi, u);
+ jit_code_inc_synth_w(code, u);
jit_link_prepare();
#if __X64
if (jit_arg_reg_p(_jitc->function->call.argi)) {
jit_stxi(_jitc->function->call.size, _RSP, regno);
_jitc->function->call.size += REAL_WORDSIZE;
jit_unget_reg(regno);
+ jit_check_frame();
}
jit_dec_synth();
}
{
jit_stxi_f(_jitc->function->call.size, _RSP, u);
_jitc->function->call.size += REAL_WORDSIZE;
+ jit_check_frame();
}
jit_dec_synth();
}
jit_stxi_f(_jitc->function->call.size, _RSP, regno);
_jitc->function->call.size += REAL_WORDSIZE;
jit_unget_reg(regno);
+ jit_check_frame();
}
jit_dec_synth();
}
{
jit_stxi_d(_jitc->function->call.size, _RSP, u);
_jitc->function->call.size += sizeof(jit_float64_t);
+ jit_check_frame();
}
jit_dec_synth();
}
jit_stxi_d(_jitc->function->call.size, _RSP, regno);
_jitc->function->call.size += sizeof(jit_float64_t);
jit_unget_reg(regno);
+ jit_check_frame();
}
jit_dec_synth();
}
jit_int32_t reg;
jit_node_t *call;
assert(_jitc->function);
+ jit_check_frame();
reg = r0;
jit_inc_synth_w(finishr, r0);
if (_jitc->function->self.alen < _jitc->function->call.size)
jit_node_t *
_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
{
-#if __X64
- jit_int32_t reg;
-#endif
jit_node_t *node;
assert(_jitc->function);
+ jit_check_frame();
jit_inc_synth_w(finishi, (jit_word_t)i0);
if (_jitc->function->self.alen < _jitc->function->call.size)
_jitc->function->self.alen = _jitc->function->call.size;
#if __X64
- /* FIXME preventing %rax allocation is good enough, but for consistency
- * it should automatically detect %rax is dead, in case it has run out
- * registers, and not save/restore it, what would be wrong if using the
- * the return value, otherwise, just a needless noop */
- /* >> prevent %rax from being allocated as the function pointer */
- jit_regset_setbit(&_jitc->regarg, _RAX);
- reg = jit_get_reg(jit_class_gpr);
- node = jit_movi(reg, (jit_word_t)i0);
- jit_finishr(reg);
- jit_unget_reg(reg);
- /* << prevent %rax from being allocated as the function pointer */
- jit_regset_clrbit(&_jitc->regarg, _RAX);
-#else
+# if !(__CYGWIN__ || _WIN32)
+ if (_jitc->function->call.call & jit_call_varargs) {
+ if (_jitc->function->call.argf)
+ jit_movi(_RAX, _jitc->function->call.argf);
+ else
+ jit_movi(_RAX, 0);
+ jit_live(_RAX);
+ }
+# endif
+#endif
node = jit_calli(i0);
node->v.w = _jitc->function->call.argi;
node->w.w = _jitc->function->call.argf;
-#endif
_jitc->function->call.argi = _jitc->function->call.argf =
_jitc->function->call.size = 0;
_jitc->prepare = 0;
struct {
jit_node_t *node;
jit_word_t word;
+ jit_function_t func;
#if DEVEL_DISASSEMBLER
jit_word_t prevw;
#endif
if ((word = _jit->pc.w & (node->u.w - 1)))
nop(node->u.w - word);
break;
- case jit_code_note: case jit_code_name:
+ case jit_code_skip:
+ nop(node->u.w);
+ break;
+ case jit_code_note: case jit_code_name:
node->u.w = _jit->pc.w;
break;
case jit_code_label:
case_rrw(rsh, _u);
case_rr(neg,);
case_rr(com,);
+ case_rr(clo,);
+ case_rr(clz,);
+ case_rr(cto,);
+ case_rr(ctz,);
case_rrr(lt,);
case_rrw(lt,);
case_rrr(lt, _u);
else {
assert(temp->code == jit_code_label ||
temp->code == jit_code_epilog);
- word = movi_p(rn(node->u.w), node->v.w);
+#if CAN_RIP_ADDRESS
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if ((jit_int32_t)word == word)
+ word = movi(rn(node->u.w), _jit->pc.w);
+ else
+#endif
+ word = movi_p(rn(node->u.w), node->v.w);
patch(word, node);
}
}
case_bff(unord, _d);
case_bfw(unord, _d, 64);
case jit_code_jmpr:
+ jit_check_frame();
jmpr(rn(node->u.w));
break;
case jit_code_jmpi:
if (temp->flag & jit_flag_patch)
jmpi(temp->u.w);
else {
- word = jmpi_p(_jit->pc.w);
+#if __X64
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if ((jit_int32_t)word == word)
+ word = jmpi(_jit->pc.w);
+ else
+#endif
+ word = jmpi_p(_jit->pc.w);
patch(word, node);
}
}
- else
+ else {
+ jit_check_frame();
jmpi(node->u.w);
+ }
break;
case jit_code_callr:
+ jit_check_frame();
callr(rn(node->u.w));
break;
case jit_code_calli:
if (temp->flag & jit_flag_patch)
calli(temp->u.w);
else {
- word = calli_p(_jit->pc.w);
+#if __X64
+ word = _jit->code.length -
+ (_jit->pc.uc - _jit->code.ptr);
+ if ((jit_int32_t)word == word)
+ word = calli(_jit->pc.w);
+ else
+#endif
+ word = calli_p(_jit->pc.w);
patch(word, node);
}
}
- else
+ else {
+ jit_check_frame();
calli(node->u.w);
+ }
break;
case jit_code_prolog:
_jitc->function = _jitc->functions.ptr + node->w.w;
undo.node = node;
undo.word = _jit->pc.w;
+ memcpy(&undo.func, _jitc->function, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
undo.prevw = prevw;
#endif
undo.patch_offset = _jitc->patches.offset;
restart_function:
+ compute_framesize();
+ patch_alist(0);
_jitc->again = 0;
prolog(node);
break;
temp->flag &= ~jit_flag_patch;
node = undo.node;
_jit->pc.w = undo.word;
+ /* undo.func.self.aoff and undo.func.regset should not
+ * be undone, as they will be further updated, and are
+ * the reason of the undo. */
+ undo.func.self.aoff = _jitc->function->frame +
+ _jitc->function->self.aoff;
+ undo.func.need_frame = _jitc->function->need_frame;
+ jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+ /* allocar information also does not need to be undone */
+ undo.func.aoffoff = _jitc->function->aoffoff;
+ undo.func.allocar = _jitc->function->allocar;
+ /* real stack framesize is not in the jit_function_t,
+ * if it were, would need to not be undone */
+ /* cvt_offset must also not be undone */
+ undo.func.cvt_offset = _jitc->function->cvt_offset;
+ /* this will be recomputed but undo anyway to have it
+ * better self documented.*/
+ undo.func.need_stack = _jitc->function->need_stack;
+ memcpy(_jitc->function, &undo.func, sizeof(undo.func));
#if DEVEL_DISASSEMBLER
prevw = undo.prevw;
#endif
_jitc->patches.offset = undo.patch_offset;
+ patch_alist(1);
goto restart_function;
}
if (node->link &&
case jit_code_live: case jit_code_ellipsis:
case jit_code_va_push:
case jit_code_allocai: case jit_code_allocar:
- case jit_code_arg:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i:
+# if __WORDSIZE == 64
+ case jit_code_arg_l:
+# endif
case jit_code_arg_f: case jit_code_arg_d:
case jit_code_va_end:
case jit_code_ret:
- case jit_code_retr: case jit_code_reti:
+ case jit_code_retr_c: case jit_code_reti_c:
+ case jit_code_retr_uc: case jit_code_reti_uc:
+ case jit_code_retr_s: case jit_code_reti_s:
+ case jit_code_retr_us: case jit_code_reti_us:
+ case jit_code_retr_i: case jit_code_reti_i:
+#if __WORDSIZE == 64
+ case jit_code_retr_ui: case jit_code_reti_ui:
+ case jit_code_retr_l: case jit_code_reti_l:
+#endif
case jit_code_retr_f: case jit_code_reti_f:
case jit_code_retr_d: case jit_code_reti_d:
case jit_code_getarg_c: case jit_code_getarg_uc:
case jit_code_getarg_ui: case jit_code_getarg_l:
#endif
case jit_code_getarg_f: case jit_code_getarg_d:
- case jit_code_putargr: case jit_code_putargi:
+ case jit_code_putargr_c: case jit_code_putargi_c:
+ case jit_code_putargr_uc: case jit_code_putargi_uc:
+ case jit_code_putargr_s: case jit_code_putargi_s:
+ case jit_code_putargr_us: case jit_code_putargi_us:
+ case jit_code_putargr_i: case jit_code_putargi_i:
+#if __WORDSIZE == 64
+ case jit_code_putargr_ui: case jit_code_putargi_ui:
+ case jit_code_putargr_l: case jit_code_putargi_l:
+#endif
case jit_code_putargr_f: case jit_code_putargi_f:
case jit_code_putargr_d: case jit_code_putargi_d:
- case jit_code_pushargr: case jit_code_pushargi:
+ case jit_code_pushargr_c: case jit_code_pushargi_c:
+ case jit_code_pushargr_uc: case jit_code_pushargi_uc:
+ case jit_code_pushargr_s: case jit_code_pushargi_s:
+ case jit_code_pushargr_us: case jit_code_pushargi_us:
+ case jit_code_pushargr_i: case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+ case jit_code_pushargr_ui: case jit_code_pushargi_ui:
+ case jit_code_pushargr_l: case jit_code_pushargi_l:
+#endif
case jit_code_pushargr_f: case jit_code_pushargi_f:
case jit_code_pushargr_d: case jit_code_pushargi_d:
case jit_code_retval_c: case jit_code_retval_uc:
for (offset = 0; offset < _jitc->patches.offset; offset++) {
node = _jitc->patches.ptr[offset].node;
word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
- patch_at(node, _jitc->patches.ptr[offset].inst, word);
+ patch_at(_jitc->patches.ptr[offset].inst, word);
}
jit_flush(_jit->code.ptr, _jit->pc.uc);
sse_stxi_d(i0, rn(r0), rn(r1));
}
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+ jit_int32_t reg;
+ /* Save stack pointer in first slot */
+ _jitc->framesize = REAL_WORDSIZE;
+ for (reg = 0; reg < jit_size(iregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+ _jitc->framesize += REAL_WORDSIZE;
+
+#if __X64 && (__CYGWIN__ || _WIN32)
+ for (reg = 0; reg < jit_size(fregs); reg++)
+ if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+ _jitc->framesize += sizeof(jit_float64_t);
+#endif
+ /* Make sure functions called have a 16 byte aligned stack */
+ _jitc->framesize = (_jitc->framesize + 15) & -16;
+ _jitc->framesize += 16 - REAL_WORDSIZE;
+}
+
static void
_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
{
static void
_sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ CHECK_CVT_OFFSET();
x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
}
static void
_sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ CHECK_CVT_OFFSET();
x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
}
static void
_x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ CHECK_CVT_OFFSET();
sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
}
static void
_x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
{
+ CHECK_CVT_OFFSET();
sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
}
/*
- * Copyright (C) 2012-2022 Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
for (regno = 0; regno < _jitc->reglen; regno++) {
if ((jit_class(_rvs[regno].spec) & spec) == spec &&
!jit_regset_tstbit(&_jitc->regarg, regno) &&
- !jit_regset_tstbit(&_jitc->reglive, regno))
+ !jit_regset_tstbit(&_jitc->reglive, regno)) {
+ if (jit_regset_tstbit(&_jitc->regmask, regno)) {
+ /* search further, attempting to find a truly known
+ * free register, not just one in unknown state. */
+ jit_int32_t regfree;
+
+ for (regfree = regno + 1;
+ regfree < _jitc->reglen; regfree++) {
+ if ((jit_class(_rvs[regfree].spec) & spec) == spec &&
+ !jit_regset_tstbit(&_jitc->regarg, regfree) &&
+ !jit_regset_tstbit(&_jitc->reglive, regfree) &&
+ !jit_regset_tstbit(&_jitc->regmask, regfree)) {
+ regno = regfree;
+ break;
+ }
+ }
+ }
goto regarg;
+ }
}
/* search for a register matching spec that is not an argument
jit_regset_new(&_jitc->regsav);
jit_regset_new(&_jitc->reglive);
jit_regset_new(&_jitc->regmask);
+ jit_regset_new(&_jitc->explive);
jit_init();
mask = 0;
break;
case jit_code_live: case jit_code_va_end:
- case jit_code_retr: case jit_code_retr_f: case jit_code_retr_d:
- case jit_code_pushargr: case jit_code_pushargr_f:
+ case jit_code_retr_c: case jit_code_retr_uc:
+ case jit_code_retr_s: case jit_code_retr_us:
+ case jit_code_retr_i: case jit_code_retr_ui:
+ case jit_code_retr_l:
+ case jit_code_retr_f: case jit_code_retr_d:
+ case jit_code_pushargr_c:
+ case jit_code_pushargr_uc:
+ case jit_code_pushargr_s:
+ case jit_code_pushargr_us:
+ case jit_code_pushargr_i:
+ case jit_code_pushargr_ui:
+ case jit_code_pushargr_l:
+ case jit_code_pushargr_f:
case jit_code_pushargr_d:
case jit_code_finishr: /* synthesized will set jit_cc_a0_jmp */
mask = jit_cc_a0_reg;
break;
- case jit_code_align: case jit_code_reti: case jit_code_pushargi:
- case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */
+ case jit_code_align: case jit_code_skip:
+ case jit_code_reti_c: case jit_code_reti_uc:
+ case jit_code_reti_s: case jit_code_reti_us:
+ case jit_code_reti_i: case jit_code_reti_ui:
+ case jit_code_reti_l:
+ case jit_code_pushargi_c:
+ case jit_code_pushargi_uc:
+ case jit_code_pushargi_s:
+ case jit_code_pushargi_us:
+ case jit_code_pushargi_i:
+ case jit_code_pushargi_ui:
+ case jit_code_pushargi_l:
+ case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */
mask = jit_cc_a0_int;
break;
case jit_code_reti_f: case jit_code_pushargi_f:
case jit_code_allocai:
mask = jit_cc_a0_int|jit_cc_a1_int;
break;
- case jit_code_arg: case jit_code_arg_f: case jit_code_arg_d:
+ case jit_code_arg_c: case jit_code_arg_s:
+ case jit_code_arg_i: case jit_code_arg_l:
+ case jit_code_arg_f: case jit_code_arg_d:
mask = jit_cc_a0_int|jit_cc_a0_arg;
break;
case jit_code_calli: case jit_code_jmpi:
case jit_code_getarg_f: case jit_code_getarg_d:
mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_arg;
break;
- case jit_code_putargr: case jit_code_putargr_f:
- case jit_code_putargr_d:
+ case jit_code_putargr_c:case jit_code_putargr_uc:
+ case jit_code_putargr_s:case jit_code_putargr_us:
+ case jit_code_putargr_i:case jit_code_putargr_ui:
+ case jit_code_putargr_l:
+ case jit_code_putargr_f:case jit_code_putargr_d:
mask = jit_cc_a0_reg|jit_cc_a1_arg;
break;
- case jit_code_putargi:
+ case jit_code_putargi_c:case jit_code_putargi_uc:
+ case jit_code_putargi_s:case jit_code_putargi_us:
+ case jit_code_putargi_i:case jit_code_putargi_ui:
+ case jit_code_putargi_l:
mask = jit_cc_a0_int|jit_cc_a1_arg;
break;
case jit_code_putargi_f:
case jit_code_negr_d: case jit_code_absr_d: case jit_code_sqrtr_d:
case jit_code_movr_d: case jit_code_extr_d: case jit_code_extr_f_d:
case jit_code_ldr_d:
+ case jit_code_clor: case jit_code_clzr:
+ case jit_code_ctor: case jit_code_ctzr:
case jit_code_movr_w_f: case jit_code_movr_f_w:
case jit_code_movr_w_d: case jit_code_movr_d_w:
case jit_code_va_arg: case jit_code_va_arg_d:
* at the start of a basic block */
for (offset = 0; offset < _jitc->blocks.offset; offset++) {
block = _jitc->blocks.ptr + offset;
- if (!block->label || block->label->code == jit_code_epilog)
+ if (!block->label)
continue;
+ if (block->label->code == jit_code_epilog) {
+ jit_regset_setbit(&block->reglive, JIT_RET);
+ jit_regset_setbit(&block->reglive, JIT_FRET);
+ jit_regset_com(&block->regmask, &block->reglive);
+ continue;
+ }
jit_setup(block);
}
}
}
while (todo);
- return (1);
+ return (todo);
}
static void
jit_node_t *node;
jit_block_t *block;
jit_word_t offset;
+ jit_regset_t regmask;
todo = 0;
_jitc->function = NULL;
if (simplify())
todo = 1;
- /* Figure out labels that are only reached with a jump
- * and is required to do a simple redundant_store removal
- * on jit_beqi below */
+ jit_regset_set_ui(®mask, 0);
+ for (offset = 0; offset < _jitc->reglen; offset++) {
+ if ((jit_class(_rvs[offset].spec) & (jit_class_gpr|jit_class_fpr)) &&
+ (jit_class(_rvs[offset].spec) & jit_class_sav) == jit_class_sav)
+ jit_regset_setbit(®mask, offset);
+ }
+
+ /* Figure out labels that are only reached with a jump */
jump = 1;
for (node = _jitc->head; node; node = node->next) {
switch (node->code) {
case jit_code_label:
- if (!jump)
+ if (!jump) {
node->flag |= jit_flag_head;
+ if (!node->link) {
+ /* Block is dead code or only reachable with an
+ * indirect jumps. In such condition, must assume
+ * all callee save registers are live. */
+ block = _jitc->blocks.ptr + node->v.w;
+ jit_regset_ior(&block->reglive,
+ &block->reglive, ®mask);
+ /* Cleanup regmask */
+ block_update_set(block, block);
+ }
+ }
break;
case jit_code_jmpi: case jit_code_jmpr:
case jit_code_epilog:
case jit_code_label: case jit_code_prolog: case jit_code_epilog:
block = _jitc->blocks.ptr + node->v.w;
jit_regset_set(&_jitc->reglive, &block->reglive);
+ jit_regset_set_ui(&_jitc->explive, 0);
+ break;
+ case jit_code_live:
+ jit_regset_setbit(&_jitc->explive, node->u.w);
break;
case jit_code_callr:
value = jit_regno(node->u.w);
else
jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
}
+ /* Prevent incorrect detection of running out of registers
+ * if will need to patch jump, and all registers have been
+ * used in the current block. */
+ if (node->code == jit_code_jmpi && (node->flag & jit_flag_node)) {
+ jit_node_t *label = node->u.n;
+ jit_block_t *block = _jitc->blocks.ptr + label->v.w;
+ jit_regset_set(&_jitc->reglive, &block->reglive);
+ jit_regset_set(&_jitc->regmask, &block->regmask);
+ if (jit_regset_set_p(&_jitc->explive)) {
+ jit_regset_ior(&_jitc->reglive, &block->reglive, &_jitc->explive);
+ jit_regset_xor(&_jitc->regmask, &_jitc->regmask, &_jitc->explive);
+ }
+ }
}
void
#else
if (!_jit->user_code) {
mmap_prot = PROT_READ | PROT_WRITE;
-#if !__OpenBSD__
+#if !(__OpenBSD__ || __APPLE__)
mmap_prot |= PROT_EXEC;
#endif
#if __NetBSD__
# endif
#else
_jit->code.ptr = mmap(NULL, length,
- PROT_EXEC | PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+ mmap_prot, mmap_flags, mmap_fd, 0);
#endif
assert(_jit->code.ptr != MAP_FAILED);
assert(result == 0);
}
if (!_jit->user_code) {
- length = _jit->pc.uc - _jit->code.ptr;
+ _jit->code.protected = _jit->pc.uc - _jit->code.ptr;
# if __riscv && __WORDSIZE == 64
/* FIXME should start adding consts at a page boundary */
- length -= _jitc->consts.hash.count * sizeof(jit_word_t);
+ _jit->code.protected -= _jitc->consts.hash.count * sizeof(jit_word_t);
# endif
- result = mprotect(_jit->code.ptr, length, PROT_READ | PROT_EXEC);
+ result = mprotect(_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC);
assert(result == 0);
}
#endif /* HAVE_MMAP */
return (NULL);
}
+void
+_jit_protect(jit_state_t *_jit)
+{
+#if !HAVE_MMAP
+ assert (_jit->user_code);
+#else
+ int result;
+ if (_jit->user_code) return;
+ result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC);
+ assert (result == 0);
+#endif
+}
+
+void
+_jit_unprotect(jit_state_t *_jit)
+{
+#if !HAVE_MMAP
+ assert (_jit->user_code);
+#else
+ int result;
+ if (_jit->user_code) return;
+ result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_WRITE);
+ assert (result == 0);
+#endif
+}
+
void
_jit_frame(jit_state_t *_jit, jit_int32_t frame)
{
* to jump to unknown location. */
/* Treat all callee save as live. */
jit_regset_ior(live, live, mask);
+ /* Prevent explicitly set as live registers to
+ * be used as a temporary for the jmpi. */
+ jit_regset_ior(live, live, &_jitc->explive);
/* Treat anything else as dead. */
return;
}
if ((jump = node->link)) {
for (; jump; jump = link) {
link = jump->link;
- jump->u.n = prev;
+ if (jump->code == jit_code_movi)
+ jump->v.n = prev;
+ else
+ jump->u.n = prev;
jump->link = prev->link;
prev->link = jump;
}
if ((jump = next->link)) {
for (; jump; jump = link) {
link = jump->link;
- jump->u.n = node;
+ if (jump->code == jit_code_movi)
+ jump->v.n = node;
+ else
+ jump->u.n = node;
jump->link = node->link;
node->link = jump;
}
}
break;
case jit_code_name: case jit_code_note:
- case jit_code_align:
break;
default:
return (0);
case jit_code_bgti_f: return (jit_code_bunlei_f);
case jit_code_bner_f: return (jit_code_beqr_f);
- case jit_code_bnei_f: return (jit_code_beqr_f);
+ case jit_code_bnei_f: return (jit_code_beqi_f);
case jit_code_bunltr_f: return (jit_code_bger_f);
case jit_code_bunlti_f: return (jit_code_bgei_f);
generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
#endif
+#define patch_alist(revert) _patch_alist(_jit, revert)
+static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert);
+
#if defined(__i386__) || defined(__x86_64__)
# include "jit_x86.c"
#elif defined(__mips__)
jit_unget_reg(reg);
}
#endif
+
+#if defined(stack_framesize)
+static maybe_unused void
+_patch_alist(jit_state_t *_jit, jit_bool_t revert)
+{
+ jit_int32_t diff;
+ jit_node_t *node;
+ diff = jit_diffsize();
+ if (diff) {
+ if (revert)
+ diff = -diff;
+ for (node = _jitc->function->alist; node; node = node->link) {
+ switch (node->code) {
+ case jit_code_ldxi_c: case jit_code_ldxi_uc:
+ case jit_code_ldxi_s: case jit_code_ldxi_us:
+ case jit_code_ldxi_i:
+#if __WORDSIZE == 64
+ case jit_code_ldxi_ui: case jit_code_ldxi_l:
+#endif
+ case jit_code_ldxi_f: case jit_code_ldxi_d:
+ node->w.w -= diff;
+ break;
+ case jit_code_stxi_c: case jit_code_stxi_s:
+ case jit_code_stxi_i:
+#if __WORDSIZE == 64
+ case jit_code_stxi_l:
+#endif
+ case jit_code_stxi_f: case jit_code_stxi_d:
+ node->u.w -= diff;
+ break;
+ default:
+ abort();
+ }
+ }
+ }
+}
+#endif
# else
fprintf(fp, "#if !defined(__ARM_PCS_VFP)\n");
# endif
-#elif defined(__mips__)
-# if __WORDSIZE == 32
-# if NEW_ABI
- fprintf(fp, "#if NEW_ABI\n");
-# else
- fprintf(fp, "#if !NEW_ABI\n");
-# endif
-# endif
#elif defined(__powerpc__)
fprintf(fp, "#if defined(__powerpc__)\n");
fprintf(fp, "#if __BYTE_ORDER == %s\n",
fprintf(fp, " %d, /* %s */\n", _szs[offset], code_name[offset]);
#if defined(__arm__)
fprintf(fp, "#endif /* __ARM_PCS_VFP */\n");
-#elif defined(__mips__)
-# if __WORDSIZE == 32
- fprintf(fp, "#endif /* NEW_ABI */\n");
-# endif
#elif defined(__powerpc__)
# if __WORDSIZE == 32
fprintf(fp, "#endif /* "