From: notaz Date: Sun, 9 Jul 2023 14:35:24 +0000 (+0300) Subject: Merge pull request #718 from pcercuei/update-lightrec-20230224 X-Git-Tag: r24l~320 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=02a5662c31c401081716623cc80bb1c4ab1dbb19;hp=e57621e0f476eaec8770927da94f3eea8304341f;p=pcsx_rearmed.git Merge pull request #718 from pcercuei/update-lightrec-20230224 Update lightrec 20230224 --- diff --git a/deps/lightning/.gitignore b/deps/lightning/.gitignore index 6fc5bf95..bc7e9712 100644 --- a/deps/lightning/.gitignore +++ b/deps/lightning/.gitignore @@ -1,3 +1,4 @@ +/build-aux +* *.o diff --git a/deps/lightning/.gitrepo b/deps/lightning/.gitrepo index 6cc08780..17edd68d 100644 --- a/deps/lightning/.gitrepo +++ b/deps/lightning/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/gnu_lightning.git branch = pcsx_rearmed - commit = b910a469a9bea63056eb53430dea4c7b56e447a8 - parent = 13b02197fcb7575646408094d5583ed7391b1153 + commit = b1983e9036d35933ffa773d81b61eedbf3ae3b93 + parent = 638335fabe3ba77b2a5c624a4c4aec52c18488f7 method = merge cmdver = 0.4.3 diff --git a/deps/lightning/ChangeLog b/deps/lightning/ChangeLog index 40ade7a2..2cd52735 100644 --- a/deps/lightning/ChangeLog +++ b/deps/lightning/ChangeLog @@ -1,3 +1,190 @@ +2023-02-23 Paulo Andrade + + * include/lightning/jit_private.h: Add new 'inst' field to + jit_compiler_t, if __mips__ is defined. This field is a simple + helper for a pending instruction to be emitted, and that can + be emitted out of order. + * lib/jit_fallback.c: Update for changes in internal mips patching + and jumping macros and function calls. + * lib/jit_mips-cpu.c: Core of changes to attempt to fill delay + slots with instructions that can be emitted out of order. + * lib/jit_mips-fpu.c: Update to use delay slot in branches. + * lib/jit_mips.c: Update for new delay slot use logic. + +2023-02-20 Paulo Andrade + + * check/float.tst: Add conditionals for mips release for expected + NaN truncated to an integer. + * check/lightning.c: Add extra preprocessor for mips release. + * include/lightning/jit_mips.h: Make the NEW_ABI preprocessor + defined to zero if using the n32 or n64 abis. This makes it + easier to create runtime checks with an always true or false + condition. + * lib/jit_mips-cpu.c, lib/jit_mips-fpu.c: Implement mips release + 6 support. + * lib/jit_mips.c: Add more reliable mips release detection code. + +2023-02-09 Paulo Andrade + + * check/Makefile.am: Update for new bit.tst test, to check the + new clor, clzr, ctor and ctzr instructions. + * check/all.tst: Update to verify encoding of new instructions. + * check/lightning.c: Update to have the lightning "assembler" + understanding the new instructions. + * include/lightning.h.in: Define new codes for new instructions. + * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c, + lib/jit_ia64.c, lib/jit_loongarch.c, lib/jit_mips.c, lib/jit_ppc.c, + lib/jit_riscv.c, lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c: + Implement fallback version of new instructions. + * lib/jit_fallback.c: Actual implementation of the fallbacks of + the new instructions. + * lib/jit_names.c: Update to print debug information of new + instructions. + +2023-01-26 Paulo Andrade + + * check/riprel.c, check/riprel.ok: New check files. + * check/Makefile.am: Support for new riprel test. + * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86.c: Implement + %rip relative addressing when reliable. Currently disabled for + x32 and _WIN32; could be added for positive relative addresses + only where it should work. + * lib/lightning.c: Correct problem added in previous patch due + to not testing on a 32 bit environment. + +2023-01-23 Paulo Andrade + + * lib/jit_mips-cpu.c, lib/jit_mips-cpu.c: Use pseudo instructions + "b" (BEQ(0,0,disp)) and "bal" (BGEZAL(0,disp)) for mips2, when an + unconditional branch or function call is known to be in range of a + relative jump. This should significantly reduce jit size generation. + +2023-01-20 Paulo Andrade + + * lib/jit_mips-cpu.c, lib/jit_mips.c, lib/jit_rewind.c: Adapt + code to implement a variable framesize and optimize frame pointer + for simple leaf functions. + +2023-01-19 Paulo Andrade + + * lib/jit_riscv.c, lib/jit_riscv-cpu.c: Adapt code to use a + variable framesize. Previously it was aligning the stack at + 8 bytes, not 16. Now functions are called with a 16 byte aligned + stack. + +2023-01-18 Paulo Andrade + + * include/lightning/jit_private.h: Include new framesize field + of jit_compiler_t; add new alist field for jit_function_t; add + new cvt_offset and need_stack fields specific to x86. + * lib/jit_x86.c, lib/jit_x86-cpu: Rewrite code to create stack + frames, so that less stack space can be used if no, or very few + callee save registers are modified in a function. + * jit_x86-sse.c, jit_x86-x87.c: Make CVT_OFFSET variable, and + dynamically allocated; this is required to avoid needing to + modify twice %rsp at function prologs, even if no stack space + is used. + +2022-11-09 Paulo Andrade + + * configure.ac: Add new --enable-devel-strong-type-checking + option. + * include/lightning.h.in: Rework to not need to know if + PACKED_STACK is defined, and add a new argument to _jit_arg, + _jit_putarg{r,i}, _jit_pusharg{r,i} and _jit_ret{r,i} to have + the same code path if PACKED_STACK is defined or not, and also + to implement STRONG_TYPE_CHECK enabled with the new + --enable-devel-strong-type-checking. + * include/lightning/jit_private.h: Add new macros to add assertions + for STRONG_TYPE_CHECK and avoid pasting tokens in jit_inc_synth* + when the token is not a static known value. + * lib/jit_aarch64.c: The first implementation of the new code, + working correctly in Apple M1 and with and without STRONG_TYPE_CHECK + in Linux. + +2022-11-08 Paulo Andrade + + Add support for packed stack arguments as used by Apple M1 + aarch64 cpus. This requires a major redesign in how Lightning + works, because contrary to all other supported ports, in this + case arguments must be truncated and sign/zero extended if + passed in registers, but when receiving the argument, there + is no need to truncate and sign/zero extend. + Return values are also treated this way. The callee must + truncate sign/zero extend, not the caller. + check/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS. + check/all.tst: Implement paired arg/getarg/pusharg/putarg/ret + codes to validate they do not generate assertions. + * check/allocar.tst, check/call.tst, check/fib.tst, check/put.tst, + check/stack.tst: Update to pass in all build types. + check/lightning.c: Add new codes for extra codes to handle + packed stack. + * configure.ac: Add a preprocessor define to know if packed stack + need is required. This is not really used, as it was moved to + jit_aarch64.h. + * doc/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS. + * doc/rpn.c: Update to pass in all build types. + include/lightning.h.in: Add new codes and reorder enum. + * include/lightning/jit_aarch64.h: Detect condition of needing + a packed stack. + * lib/jit_aarch64-sz.c: Regenerate. + * lib/jit_aarch64.c: Major updates for packed stack. + * lib/jit_names.c: Updates for debug output. + * lib/lightning.c: Update for new codes. + +2022-10-31 Marc Nieper-Wißkirchen + + Add new skip instruction. + * .gitignore: Update from Gnulib. + * check/Makefile.am: Add tests. + * check/lightning.c: Handle skip instructions. + * check/protect.c: Rewrite with skip. + * check/skip.ok: New test. + * check/skip.tst: New test. + * doc/body.texi: Document the skip instruction. + * include/lightning.h.in: Add the skip instruction. + * lib/jit_aarch64-sz.c: Update for skip instruction. + * lib/jit_aarch64.c: Implement skip instruction. + * lib/jit_alpha-sz.c: Update for skip instruction. + * lib/jit_alpha.c: Implement skip instruction. + * lib/jit_arm-sz.c: Update for skip instruction. + * lib/jit_arm.c: Implement skip instruction. + * lib/jit_hppa-sz.c: Update for skip instruction. + * lib/jit_hppa.c: Implement skip instruction. + * lib/jit_ia64-sz.c: Update for skip instruction. + * lib/jit_ia64.c: Implement skip instruction. + * lib/jit_loongarch-sz.c: Update for skip instruction. + * lib/jit_loongarch.c: Implement skip instruction. + * lib/jit_mips-sz.c: Update for skip instruction. + * lib/jit_mips.c: Implement skip instruction. + * lib/jit_names.c: Update for skip instruction. + * lib/jit_ppc-sz.c: Update for skip instruction. + * lib/jit_ppc.c: Implement skip instruction. + * lib/jit_riscv-sz.c: Update for skip instruction. + * lib/jit_riscv.c: Implement skip instruction. + * lib/jit_s390-sz.c: Update for skip instruction. + * lib/jit_s390.c: Implement skip instruction. + * lib/jit_size.c: Treat align and skip in a special way. + * lib/jit_sparc-sz.c: Update for skip instruction. + * lib/jit_sparc.c: Implement skip instruction. + * lib/jit_x86-sz.c: Update for skip instruction. + * lib/jit_x86.c: Implement skip instruction. + * lib/lightning.c: Classify skip instruction. + +2022-10-30 Marc Nieper-Wißkirchen + + Add user-visible functions jit_protect and jit_unprotect. + * check/Makefile.am: Add test for jit_protect and jit_unprotect. + * check/protect.c: New test. + * doc/body.texi: Add documentation for jit_protect and + jit_unprotect. + * include/lightning.h.in: Add prototypes for jit_protect and + jit_unprotect. + * include/lightning/jit_private.h: Add a field to store the size + of the protected memory. + * lib/lightning.c: Remember the size of the protected memory and + implement the two new functions. + 2022-10-12 Paulo Andrade * include/lightning/jit_loongarch.h, lib/jit_loongarch-cpu.c, diff --git a/deps/lightning/Makefile.am b/deps/lightning/Makefile.am index 112deaed..8dbbaef7 100644 --- a/deps/lightning/Makefile.am +++ b/deps/lightning/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc. # # This file is part of GNU lightning. # diff --git a/deps/lightning/THANKS b/deps/lightning/THANKS index 0e0f1a94..d5737afb 100644 --- a/deps/lightning/THANKS +++ b/deps/lightning/THANKS @@ -19,3 +19,4 @@ Holger Hans Peter Freyther Jon Arintok Bruno Haible Marc Nieper-Wißkirchen +Paul Cercueil diff --git a/deps/lightning/TODO b/deps/lightning/TODO index 676af029..8b137891 100644 --- a/deps/lightning/TODO +++ b/deps/lightning/TODO @@ -1,28 +1 @@ - * Validate that divrem in jit_x86-cpu.c is not modifying - the non result arguments. This is not verified by clobber.tst, - as it only checks registers not involved in the operation - (because it does not know about values being set as input - for the the operation). - * Write a simple higher level language implementation generating - jit with lightning, that could be some lisp or C like language. - - * rerun ./configure --enable-devel-get-jit-size and regenerate - the related jit_$arch-sz.c for the ports where nodata is - meaningful: - hppa (done) - i586 (done) - ia64 - mips o32 (done) - mips n32 - mips n64 - powerpc 32 (done) - powerpc 64 (done) - ppc - s390x (done) - sparc (done) - x86_64 (done) - Missing ones are due to no longer (remote) access to such hosts - and may be broken with jit_set_data(..., JIT_DISABLE_DATA). - (ia64 hp-ux or linx), (irix mips for 32 or 64 abi), and - (darwin ppc). diff --git a/deps/lightning/check/Makefile.am b/deps/lightning/check/Makefile.am index 10537b1f..c77f5cd3 100644 --- a/deps/lightning/check/Makefile.am +++ b/deps/lightning/check/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2012-2022 Free Software Foundation, Inc. +# Copyright 2012-2023 Free Software Foundation, Inc. # # This file is part of GNU lightning. # @@ -14,10 +14,11 @@ # License for more details. # -AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE +AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ + -D_GNU_SOURCE $(LIGHTNING_CFLAGS) check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \ - catomic + catomic protect riprel lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) lightning_SOURCES = lightning.c @@ -46,6 +47,12 @@ cva_list_SOURCES = cva_list.c catomic_LDADD = $(top_builddir)/lib/liblightning.la -lm -lpthread $(SHLIB) catomic_SOURCES = catomic.c +protect_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) +protect_SOURCES = protect.c + +riprel_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB) +riprel_SOURCES = riprel.c + $(top_builddir)/lib/liblightning.la: cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la @@ -105,8 +112,10 @@ EXTRA_DIST = \ range.tst range.ok \ ranger.tst ranger.ok \ ret.tst ret.ok \ + skip.tst skip.ok \ tramp.tst tramp.ok \ va_list.tst va_list.ok \ + bit.tst bit.ok \ check.sh \ check.x87.sh \ check.arm.sh check.swf.sh \ @@ -114,7 +123,8 @@ EXTRA_DIST = \ check.arm4.swf.sh \ check.nodata.sh \ check.x87.nodata.sh \ - run-test all.tst + run-test all.tst \ + collatz.tst factorial.tst base_TESTS = \ 3to2 add align allocai \ @@ -135,8 +145,8 @@ base_TESTS = \ clobber carry call \ float jmpr live put \ qalu_mul qalu_div \ - range ranger ret tramp \ - va_list + range ranger ret skip tramp \ + va_list bit $(base_TESTS): check.sh $(LN_S) $(srcdir)/check.sh $@ @@ -317,13 +327,14 @@ nodata_TESTS = \ clobber.nodata carry.nodata call.nodata \ float.nodata jmpr.nodata tramp.nodata \ range.nodata ranger.nodata put.nodata \ - va_list.nodata + va_list.nodata bit.nodata $(nodata_TESTS): check.nodata.sh $(LN_S) $(srcdir)/check.nodata.sh $@ TESTS += $(nodata_TESTS) endif -TESTS += ccall self setcode nodata ctramp carg cva_list catomic +TESTS += ccall self setcode nodata ctramp carg cva_list catomic \ + protect riprel CLEANFILES = $(TESTS) #TESTS_ENVIRONMENT=$(srcdir)/run-test; diff --git a/deps/lightning/check/all.tst b/deps/lightning/check/all.tst index ac4fc975..d24f7ae0 100644 --- a/deps/lightning/check/all.tst +++ b/deps/lightning/check/all.tst @@ -2,15 +2,16 @@ .code prolog allocai 32 $buf - arg $c - arg $uc - arg $s - arg $us - arg $i + arg_c $c + arg_c $uc + arg_s $s + arg_s $us + arg_i $i + arg_i $ui #if __WORDSIZE == 64 - arg $ui - arg $l + arg_l $l #endif + arg $a getarg_c %r0 $c getarg_uc %r0 $uc getarg_s %r0 $s @@ -20,6 +21,25 @@ getarg_ui %r0 $ui getarg_l %r0 $l #endif + getarg %r0 $a + putargr_c %r0 $c + putargi_c 1 $c + putargr_uc %r0 $uc + putargi_uc 1 $uc + putargr_s %r0 $s + putargi_s 1 $s + putargr_us %r0 $us + putargi_us 1 $us + putargr_i %r0 $i + putargi_i 1 $ui +#if __WORDSIZE == 64 + putargr_ui %r0 $ui + putargi_ui 1 $ui + putargr_l %r0 $l + putargi_l 1 $l +#endif + putargr %r0 $a + putargi 1 $a addr %r0 %r1 %r2 addi %r0 %r1 2 addcr %r0 %r1 %r2 @@ -64,6 +84,10 @@ rshi_u %r0 %r1 2 negr %r0 %r1 comr %r0 %r1 + clor %r0 %r1 + clzr %r0 %r1 + ctor %r0 %r1 + ctzr %r0 %r1 ltr %r0 %r1 %r2 lti %r0 %r1 2 ltr_u %r0 %r1 %r2 @@ -205,6 +229,15 @@ label: callr %r0 calli label prepare + pushargr_c %r0 + pushargr_uc %r0 + pushargr_s %r0 + pushargr_us %r0 + pushargr_i %r0 +#if __WORDSIZE == 64 + pushargr_ui %r0 + pushargr_l %r0 +#endif pushargr %r0 finishr %r0 prepare @@ -212,6 +245,15 @@ label: ellipsis finishi 0x80000000 ret + retr_c %r1 + retr_uc %r1 + retr_s %r1 + retr_us %r1 + retr_i %r1 +#if __WORDSIZE == 64 + retr_ui %r1 + retr_l %r1 +#endif retr %r1 reti 2 retval_c %r1 @@ -225,6 +267,8 @@ label: #endif arg_f $f getarg_f %f1 $f + putargr_f %f1 $f + putargi_f 1.0 $f addr_f %f0 %f1 %f2 addi_f %f0 %f1 0.5 subr_f %f0 %f1 %f2 @@ -323,6 +367,8 @@ unordi: retval_f %f1 arg_d $f getarg_d %f1 $f + putargr_d %f1 $f + putargi_d 1.0 $f addr_d %f0 %f1 %f2 addi_d %f0 %f1 0.5 subr_d %f0 %f1 %f2 diff --git a/deps/lightning/check/allocar.tst b/deps/lightning/check/allocar.tst index e3ee0109..1bffef87 100644 --- a/deps/lightning/check/allocar.tst +++ b/deps/lightning/check/allocar.tst @@ -55,7 +55,7 @@ fill##T##done: \ #define fill_us fill_s #define fill_ui fill_i -#define ARG( T, N) arg $arg##T##N +#define ARG( T, N) arg##T $arg##T##N #define ARGF( T, N) arg##T $arg##T##N #define ARG1( K, T) ARG##K(T, 0) #define ARG2( K, T) ARG1( K, T) ARG##K(T, 1) @@ -74,56 +74,56 @@ fill##T##done: \ #define ARG15(K, T) ARG14(K, T) ARG##K(T, 14) #define ARG16(K, T) ARG15(K, T) ARG##K(T, 15) #define ARG_c(N) ARG##N( , _c) -#define ARG_uc(N) ARG##N( , _uc) +#define ARG_uc(N) ARG##N( , _c) #define ARG_s(N) ARG##N( , _s) -#define ARG_us(N) ARG##N( , _us) +#define ARG_us(N) ARG##N( , _s) #define ARG_i(N) ARG##N( , _i) -#define ARG_ui(N) ARG##N( , _ui) +#define ARG_ui(N) ARG##N( , _i) #define ARG_l(N) ARG##N( , _l) #define ARG_f(N) ARG##N(F, _f) #define ARG_d(N) ARG##N(F, _d) -#define CHK(N, T, V) \ - getarg %r0 $arg##T##V \ +#define CHK(N, T, TT, V) \ + getarg##T %r0 $arg##TT##V \ ldxi##T %r1 %v0 $(V * szof##T) \ beqr N##T##V %r0 %r1 \ calli @abort \ N##T##V: -#define CHKF(N, T, V) \ - getarg##T %f0 $arg##T##V \ +#define CHKF(N, T, TT, V) \ + getarg##T %f0 $arg##TT##V \ ldxi##T %f1 %v0 $(V * szof##T) \ beqr##T N##T##V %f0 %f1 \ calli @abort \ N##T##V: -#define GET1( K, N, T, V) CHK##K(N, T, 0) -#define GET2( K, N, T, V) GET1( K, N, T, V) CHK##K(N, T, 1) -#define GET3( K, N, T, V) GET2( K, N, T, V) CHK##K(N, T, 2) -#define GET4( K, N, T, V) GET3( K, N, T, V) CHK##K(N, T, 3) -#define GET5( K, N, T, V) GET4( K, N, T, V) CHK##K(N, T, 4) -#define GET6( K, N, T, V) GET5( K, N, T, V) CHK##K(N, T, 5) -#define GET7( K, N, T, V) GET6( K, N, T, V) CHK##K(N, T, 6) -#define GET8( K, N, T, V) GET7( K, N, T, V) CHK##K(N, T, 7) -#define GET9( K, N, T, V) GET8( K, N, T, V) CHK##K(N, T, 8) -#define GET10(K, N, T, V) GET9( K, N, T, V) CHK##K(N, T, 9) -#define GET11(K, N, T, V) GET10(K, N, T, V) CHK##K(N, T, 10) -#define GET12(K, N, T, V) GET11(K, N, T, V) CHK##K(N, T, 11) -#define GET13(K, N, T, V) GET12(K, N, T, V) CHK##K(N, T, 12) -#define GET14(K, N, T, V) GET13(K, N, T, V) CHK##K(N, T, 13) -#define GET15(K, N, T, V) GET14(K, N, T, V) CHK##K(N, T, 14) -#define GET16(K, N, T, V) GET15(K, N, T, V) CHK##K(N, T, 15) +#define GET1( K, N, T, TT, V) CHK##K(N, T, TT, 0) +#define GET2( K, N, T, TT, V) GET1( K, N, T, TT, V) CHK##K(N, T, TT, 1) +#define GET3( K, N, T, TT, V) GET2( K, N, T, TT, V) CHK##K(N, T, TT, 2) +#define GET4( K, N, T, TT, V) GET3( K, N, T, TT, V) CHK##K(N, T, TT, 3) +#define GET5( K, N, T, TT, V) GET4( K, N, T, TT, V) CHK##K(N, T, TT, 4) +#define GET6( K, N, T, TT, V) GET5( K, N, T, TT, V) CHK##K(N, T, TT, 5) +#define GET7( K, N, T, TT, V) GET6( K, N, T, TT, V) CHK##K(N, T, TT, 6) +#define GET8( K, N, T, TT, V) GET7( K, N, T, TT, V) CHK##K(N, T, TT, 7) +#define GET9( K, N, T, TT, V) GET8( K, N, T, TT, V) CHK##K(N, T, TT, 8) +#define GET10(K, N, T, TT, V) GET9( K, N, T, TT, V) CHK##K(N, T, TT, 9) +#define GET11(K, N, T, TT, V) GET10(K, N, T, TT, V) CHK##K(N, T, TT, 10) +#define GET12(K, N, T, TT, V) GET11(K, N, T, TT, V) CHK##K(N, T, TT, 11) +#define GET13(K, N, T, TT, V) GET12(K, N, T, TT, V) CHK##K(N, T, TT, 12) +#define GET14(K, N, T, TT, V) GET13(K, N, T, TT, V) CHK##K(N, T, TT, 13) +#define GET15(K, N, T, TT, V) GET14(K, N, T, TT, V) CHK##K(N, T, TT, 14) +#define GET16(K, N, T, TT, V) GET15(K, N, T, TT, V) CHK##K(N, T, TT, 15) -#define GET_c(N, M) GET##N( , c##N, _c, M) -#define GET_uc(N, M) GET##N( , uc##N, _uc, M) -#define GET_s(N, M) GET##N( , s##N, _s, M) -#define GET_us(N, M) GET##N( , us##N, _us, M) -#define GET_i(N, M) GET##N( , i##N, _i, M) -#define GET_ui(N, M) GET##N( , ui##N, _ui, M) -#define GET_l(N, M) GET##N( , l##N, _l, M) -#define GET_f(N, M) GET##N(F, f##N, _f, M) -#define GET_d(N, M) GET##N(F, d##N, _d, M) +#define GET_c(N, M) GET##N( , c##N, _c, _c, M) +#define GET_uc(N, M) GET##N( , uc##N, _uc, _c, M) +#define GET_s(N, M) GET##N( , s##N, _s, _s, M) +#define GET_us(N, M) GET##N( , us##N, _us, _s, M) +#define GET_i(N, M) GET##N( , i##N, _i, _i, M) +#define GET_ui(N, M) GET##N( , ui##N, _ui, _i, M) +#define GET_l(N, M) GET##N( , l##N, _l, _l, M) +#define GET_f(N, M) GET##N(F, f##N, _f, _f, M) +#define GET_d(N, M) GET##N(F, d##N, _d, _d, M) -#define PUSH( T, V) pushargi V +#define PUSH( T, V) pushargi##T V #define PUSHF( T, V) pushargi##T V #define PUSH0( K, T) /**/ #define PUSH1( K, T) PUSH##K(T, 0) @@ -161,14 +161,14 @@ test##T##_0: \ ret \ epilog -#define DEFN(N, M, T) \ +#define DEFN(N, M, T, TT) \ name test##T##_##N \ test##T##_##N: \ prolog \ arg $argp \ /* stack buffer in %v0 */ \ getarg %v0 $argp \ - ARG##T(N) \ + ARG##TT(N) \ /* validate arguments */ \ GET##T(N, M) \ /* heap buffer in %v1 */ \ @@ -260,24 +260,24 @@ test##T##_17_done: \ ret \ epilog -#define DEF( T) \ +#define DEF( T, TT) \ DEF0( T) \ - DEFN( 1, 0, T) \ - DEFN( 2, 1, T) \ - DEFN( 3, 2, T) \ - DEFN( 4, 3, T) \ - DEFN( 5, 4, T) \ - DEFN( 6, 5, T) \ - DEFN( 7, 6, T) \ - DEFN( 8, 7, T) \ - DEFN( 9, 8, T) \ - DEFN(10, 9, T) \ - DEFN(11, 10, T) \ - DEFN(12, 11, T) \ - DEFN(13, 12, T) \ - DEFN(14, 13, T) \ - DEFN(15, 14, T) \ - DEFN(16, 15, T) \ + DEFN( 1, 0, T, TT) \ + DEFN( 2, 1, T, TT) \ + DEFN( 3, 2, T, TT) \ + DEFN( 4, 3, T, TT) \ + DEFN( 5, 4, T, TT) \ + DEFN( 6, 5, T, TT) \ + DEFN( 7, 6, T, TT) \ + DEFN( 8, 7, T, TT) \ + DEFN( 9, 8, T, TT) \ + DEFN(10, 9, T, TT) \ + DEFN(11, 10, T, TT) \ + DEFN(12, 11, T, TT) \ + DEFN(13, 12, T, TT) \ + DEFN(14, 13, T, TT) \ + DEFN(15, 14, T, TT) \ + DEFN(16, 15, T, TT) \ DEFX(T) #define CALL(T) calli test##T##_17 @@ -323,17 +323,17 @@ memcpy_done: FILLF(_f) FILLF(_d) - DEF(_c) - DEF(_uc) - DEF(_s) - DEF(_us) - DEF(_i) + DEF(_c, _c) + DEF(_uc, _c) + DEF(_s, _s) + DEF(_us, _s) + DEF(_i, _i) #if __WORDSIZE == 64 - DEF(_ui) - DEF(_l) + DEF(_ui, _i) + DEF(_l, _l) #endif - DEF(_f) - DEF(_d) + DEF(_f, _f) + DEF(_d, _d) name main main: diff --git a/deps/lightning/check/bit.ok b/deps/lightning/check/bit.ok new file mode 100644 index 00000000..9766475a --- /dev/null +++ b/deps/lightning/check/bit.ok @@ -0,0 +1 @@ +ok diff --git a/deps/lightning/check/bit.tst b/deps/lightning/check/bit.tst new file mode 100644 index 00000000..b721d5c9 --- /dev/null +++ b/deps/lightning/check/bit.tst @@ -0,0 +1,881 @@ +/* If the fallback clor, clzr, ctor and ctzr are used, it might be better + * to implement it as functions, as inlined it is almost as large as a + * function. + * Below is an example of how to do it. + */ + +.data 4096 +str_clo: +.c "clo" +str_clz: +.c "clz" +str_cto: +.c "cto" +str_ctz: +.c "ctz" +print_fmt: +#if __WORDSIZE == 64 +.c "%s (0x%016lx) %s = %d\n" +#else +.c "%s (0x%08lx) %s = %d\n" +#endif +ok: +.c "ok\n" + +#define BIT2(OP, ARG, RES, R0, R1) \ + movi %R1 ARG \ + OP##r %R0 %R1 \ + beqi OP##R0##R1##ARG %R0 RES \ + calli @abort \ +OP##R0##R1##ARG: + +#define BIT1(OP, ARG, RES, V0, V1, V2, R0, R1, R2) \ + BIT2(OP, ARG, RES, V0, V0) \ + BIT2(OP, ARG, RES, V0, V1) \ + BIT2(OP, ARG, RES, V0, V2) \ + BIT2(OP, ARG, RES, V0, R0) \ + BIT2(OP, ARG, RES, V0, R1) \ + BIT2(OP, ARG, RES, V0, R2) + +#define BIT(OP, ARG, RES, V0, V1, V2, R0, R1, R2) \ + BIT1(OP, ARG, RES, V1, V2, R0, R1, R2, V0) \ + BIT1(OP, ARG, RES, V2, R0, R1, R2, V0, V1) \ + BIT1(OP, ARG, RES, R0, R1, R2, V0, V1, V2) \ + BIT1(OP, ARG, RES, R1, R2, V0, V1, V2, R0) \ + BIT1(OP, ARG, RES, R2, V0, V1, V2, R0, R1) + +#define CLO(ARG, RES) \ + BIT(clo, ARG, RES, v0, v1, v2, r0, r1, r2) +#define CLZ(ARG, RES) \ + BIT(clz, ARG, RES, v0, v1, v2, r0, r1, r2) +#define CTO(ARG, RES) \ + BIT(cto, ARG, RES, v0, v1, v2, r0, r1, r2) +#define CTZ(ARG, RES) \ + BIT(ctz, ARG, RES, v0, v1, v2, r0, r1, r2) + +.code + jmpi main +/* + jit_uword_t cto(jit_uword_t r0) { + r0 = ~r0; + if (r0 == 0) + r0 = __WORDSIZE; + else + r0 = ctz(r0); + return r0; + } + */ +name cto +cto: + prolog + arg $in + getarg %r0 $in + comr %r0 %r0 + bnei do_cto %r0 0 + movi %r0 __WORDSIZE + jmpi done_cto +do_cto: + prepare + pushargr %r0 + finishi ctz + retval %r0 +done_cto: + retr %r0 + epilog + +/* + jit_uword_t clo(jit_uword_t r0) { + r0 = ~r0; + if (r0 == 0) + r0 = __WORDSIZE; + else + r0 = clz(r0); + return r0; + } + */ +name clo +clo: + prolog + arg $in + getarg %r0 $in + comr %r0 %r0 + bnei do_clo %r0 0 + movi %r0 __WORDSIZE + jmpi done_clo +do_clo: + prepare + pushargr %r0 + finishi clz + retval %r0 +done_clo: + retr %r0 + epilog + +/* + jit_uword_t clz(jit_word_t r1) { + jit_uword_t r0, r2; + if (r1 == 0) + r0 = __WORDSIZE; + else { + r0 = 0; + #if __WORDSIZE == 64 + r2 = 0xffffffff00000000UL; + if (!(r1 & r2)) { + r1 <<= 32; + r0 += 32; + } + r2 <<= 16; + #else + r2 = 0xffff0000UL; + #endif + if (!(r1 & r2)) { + r1 <<= 16; + r0 += 16; + } + r2 <<= 8; + if (!(r1 & r2)) { + r1 <<= 8; + r0 += 8; + } + r2 <<= 4; + if (!(r1 & r2)) { + r1 <<= 4; + r0 += 4; + } + r2 <<= 2; + if (!(r1 & r2)) { + r1 <<= 2; + r0 += 2; + } + r2 <<= 1; + if (!(r1 & r2)) + r0 += 1; + } + return r0; + } + */ +name clz +clz: + prolog + arg $in + getarg %r1 $in + bnei lun %r1 0 + reti __WORDSIZE +lun: + movi %r0 0 +#if __WORDSIZE == 64 + movi %r2 0xffffffff00000000 + bmsr l32 %r1 %r2 + lshi %r1 %r1 32 + addi %r0 %r0 32 +l32: + lshi %r2 %r2 16 +#else + movi %r2 0xffff0000 +#endif + bmsr l16 %r1 %r2 + lshi %r1 %r1 16 + addi %r0 %r0 16 +l16: + lshi %r2 %r2 8 + bmsr l8 %r1 %r2 + lshi %r1 %r1 8 + addi %r0 %r0 8 +l8: + lshi %r2 %r2 4 + bmsr l4 %r1 %r2 + lshi %r1 %r1 4 + addi %r0 %r0 4 +l4: + lshi %r2 %r2 2 + bmsr l2 %r1 %r2 + lshi %r1 %r1 2 + addi %r0 %r0 2 +l2: + lshi %r2 %r2 1 + bmsr l1 %r1 %r2 + addi %r0 %r0 1 +l1: + retr %r0 + epilog + +/* + jit_uword_t ctz(jit_uword_t r1) { + jit_uword_t r0, r2; + if (r1 == 0) + r0 = __WORDSIZE; + else { + r0 = 0; + #if __WORDSIZE == 64 + r2 = 0xffffffffUL;; + if (!(r1 & r2)) { + r1 >>= 32; + r0 += 32; + } + r2 >>= 16; + #else + r2 = 0xffffUL;; + #endif + if (!(r1 & r2)) { + r1 >>= 16; + r0 += 16; + } + r2 >>= 8; + if (!(r1 & r2)) { + r1 >>= 8; + r0 += 8; + } + r2 >>= 4; + if (!(r1 & r2)) { + r1 >>= 4; + r0 += 4; + } + r2 >>= 2; + if (!(r1 & r2)) { + r1 >>= 2; + r0 += 2; + } + r2 >>= 1; + if (!(r1 & r2)) + r0 += 1; + } + return r0; + } +*/ +name ctz +ctz: + prolog + arg $in + getarg %r1 $in + bnei tun %r1 0 + reti __WORDSIZE +tun: +#if __WORDSIZE == 64 + movi %r0 0 + movi %r2 0xffffffff + bmsr t32 %r1 %r2 + rshi_u %r1 %r1 32 + addi %r0 %r0 32 +t32: + rshi %r2 %r2 16 +#else + movi %r2 0xffff +#endif + bmsr t16 %r1 %r2 + rshi_u %r1 %r1 16 + addi %r0 %r0 16 +t16: + rshi %r2 %r2 8 + bmsr t8 %r1 %r2 + rshi_u %r1 %r1 8 + addi %r0 %r0 8 +t8: + rshi %r2 %r2 4 + bmsr t4 %r1 %r2 + rshi_u %r1 %r1 4 + addi %r0 %r0 4 +t4: + rshi %r2 %r2 2 + bmsr t2 %r1 %r2 + rshi_u %r1 %r1 2 + addi %r0 %r0 2 +t2: + rshi %r2 %r2 1 + bmsr t1 %r1 %r2 + addi %r0 %r0 1 +t1: + retr %r0 + epilog + +/* + char *bitsprint(char *v0, jit_uword_t v1) { + jit_uword_t r0, r1; + memset(v0, '0', __WORDSIZE); + v0[__WORDSIZE] = 0; + for (r0 = 1L << (__WORDSIZE - 1), r1 = 0; r0; r0 >>= 1, ++r1) { + if (v1 & r0) + v0[r1] = '1'; + } + return v0; + } + */ +name bitsprint +bitsprint: + prolog + arg $buf + arg $val + getarg %v0 $buf + getarg %v1 $val + prepare + pushargr %v0 + pushargi '0' + pushargi __WORDSIZE + finishi @memset + movi %r0 0 + addi %r1 %v0 __WORDSIZE + str_c %r1 %r0 + movi %r0 $(1 << (__WORDSIZE - 1)) + movi %r1 0 + movi %r2 '1' +bitloop: + bmcr bitzero %v1 %r0 + stxr_c %r1 %v0 %r2 +bitzero: + addi %r1 %r1 1 + rshi_u %r0 %r0 1 + bnei bitloop %r0 0 + retr %v0 + epilog + +/* + #if 0 + int main(int argc, char *argv[]) { + jit_uword_t r0, v0, v1, v2; + char buf[80]; + #if __WORDSIZE == 64 + char *fmt = "%s (0x%016lx) %s = %d\n"; + v0 = 0x8000000000000000UL; + v2 = 0xffffffffffffffffUL; + #else + char *fmt = "%s (0x%08lx) %s = %d\n"; + v0 = 0x80000000UL; + v2 = 0xffffffffUL; + #endif + do { + v1 = v0 - 1; + r0 = clz(v0); + bitsprint(buf, v0); + printf(fmt, "clz", v0, buf, r0); + r0 = clo(v2); + bitsprint(buf, v2); + printf(fmt, "clo", v2, buf, r0); + r0 = ctz(v0); + bitsprint(buf, v0); + printf(fmt, "ctz", v0, buf, r0); + r0 = cto(v1); + bitsprint(buf, v1); + printf(fmt, "cto", v1, buf, r0); + v0 >>= 1; + v2 <<= 1; + } while ((jit_word_t)v1 > -1); + return 0; + } + #endif + */ + +/* Make it "#if 1" for a "debug mode", that helps in regenerating tables, + * or temporary state while implementing optimized port specific versions. */ +#if 0 +#define CALL_FUNC 1 + name main +main: + prolog + allocai 80 $buf +#if __WORDSIZE == 64 + movi %v0 0x8000000000000000 + movi %v2 0xffffffffffffffff +#else + movi %v0 0x80000000 + movi %v2 0xffffffff +#endif +loop: + subi %v1 %v0 1 + addi %r1 %fp $buf + prepare + pushargr %r1 + pushargr %v0 + finishi bitsprint +#if CALL_FUNC + prepare + pushargr %v0 + finishi clz + retval %r0 +#else + clzr %r0 %v0 +#endif + addi %r1 %fp $buf + prepare + pushargi print_fmt + ellipsis + pushargi str_clz + pushargr %v0 + pushargr %r1 + pushargr %r0 + finishi @printf + addi %r1 %fp $buf + prepare + pushargr %r1 + pushargr %v2 + finishi bitsprint +#if CALL_FUNC + prepare + pushargr %v2 + finishi clo + retval %r0 +#else + clor %r0 %v2 +#endif + addi %r1 %fp $buf + prepare + pushargi print_fmt + ellipsis + pushargi str_clo + pushargr %v2 + pushargr %r1 + pushargr %r0 + finishi @printf + addi %r1 %fp $buf + prepare + pushargr %r1 + pushargr %v0 + finishi bitsprint +#if CALL_FUNC + prepare + pushargr %v0 + finishi ctz + retval %r0 +#else + ctzr %r0 %v0 +#endif + addi %r1 %fp $buf + prepare + pushargi print_fmt + ellipsis + pushargi str_ctz + pushargr %v0 + pushargr %r1 + pushargr %r0 + finishi @printf + addi %r1 %fp $buf + prepare + pushargr %r1 + pushargr %v1 + finishi bitsprint +#if CALL_FUNC + prepare + pushargr %v1 + finishi cto + retval %r0 +#else + ctor %r0 %v1 +#endif + addi %r1 %fp $buf + prepare + pushargi print_fmt + ellipsis + pushargi str_cto + pushargr %v1 + pushargr %r1 + pushargr %r0 + finishi @printf + rshi_u %v0 %v0 1 + lshi %v2 %v2 1 + bgti loop %v1 -1 + ret + epilog +#else + + name main +main: + prolog +#if __WORDSIZE == 32 + CLZ(0x80000000, 0) + CLO(0xffffffff, 32) + CTZ(0x80000000, 31) + CTO(0x7fffffff, 31) + CLZ(0x40000000, 1) + CLO(0xfffffffe, 31) + CTZ(0x40000000, 30) + CTO(0x3fffffff, 30) + CLZ(0x20000000, 2) + CLO(0xfffffffc, 30) + CTZ(0x20000000, 29) + CTO(0x1fffffff, 29) + CLZ(0x10000000, 3) + CLO(0xfffffff8, 29) + CTZ(0x10000000, 28) + CTO(0x0fffffff, 28) + CLZ(0x08000000, 4) + CLO(0xfffffff0, 28) + CTZ(0x08000000, 27) + CTO(0x07ffffff, 27) + CLZ(0x04000000, 5) + CLO(0xffffffe0, 27) + CTZ(0x04000000, 26) + CTO(0x03ffffff, 26) + CLZ(0x02000000, 6) + CLO(0xffffffc0, 26) + CTZ(0x02000000, 25) + CTO(0x01ffffff, 25) + CLZ(0x01000000, 7) + CLO(0xffffff80, 25) + CTZ(0x01000000, 24) + CTO(0x00ffffff, 24) + CLZ(0x00800000, 8) + CLO(0xffffff00, 24) + CTZ(0x00800000, 23) + CTO(0x007fffff, 23) + CLZ(0x00400000, 9) + CLO(0xfffffe00, 23) + CTZ(0x00400000, 22) + CTO(0x003fffff, 22) + CLZ(0x00200000, 10) + CLO(0xfffffc00, 22) + CTZ(0x00200000, 21) + CTO(0x001fffff, 21) + CLZ(0x00100000, 11) + CLO(0xfffff800, 21) + CTZ(0x00100000, 20) + CTO(0x000fffff, 20) + CLZ(0x00080000, 12) + CLO(0xfffff000, 20) + CTZ(0x00080000, 19) + CTO(0x0007ffff, 19) + CLZ(0x00040000, 13) + CLO(0xffffe000, 19) + CTZ(0x00040000, 18) + CTO(0x0003ffff, 18) + CLZ(0x00020000, 14) + CLO(0xffffc000, 18) + CTZ(0x00020000, 17) + CTO(0x0001ffff, 17) + CLZ(0x00010000, 15) + CLO(0xffff8000, 17) + CTZ(0x00010000, 16) + CTO(0x0000ffff, 16) + CLZ(0x00008000, 16) + CLO(0xffff0000, 16) + CTZ(0x00008000, 15) + CTO(0x00007fff, 15) + CLZ(0x00004000, 17) + CLO(0xfffe0000, 15) + CTZ(0x00004000, 14) + CTO(0x00003fff, 14) + CLZ(0x00002000, 18) + CLO(0xfffc0000, 14) + CTZ(0x00002000, 13) + CTO(0x00001fff, 13) + CLZ(0x00001000, 19) + CLO(0xfff80000, 13) + CTZ(0x00001000, 12) + CTO(0x00000fff, 12) + CLZ(0x00000800, 20) + CLO(0xfff00000, 12) + CTZ(0x00000800, 11) + CTO(0x000007ff, 11) + CLZ(0x00000400, 21) + CLO(0xffe00000, 11) + CTZ(0x00000400, 10) + CTO(0x000003ff, 10) + CLZ(0x00000200, 22) + CLO(0xffc00000, 10) + CTZ(0x00000200, 9) + CTO(0x000001ff, 9) + CLZ(0x00000100, 23) + CLO(0xff800000, 9) + CTZ(0x00000100, 8) + CTO(0x000000ff, 8) + CLZ(0x00000080, 24) + CLO(0xff000000, 8) + CTZ(0x00000080, 7) + CTO(0x0000007f, 7) + CLZ(0x00000040, 25) + CLO(0xfe000000, 7) + CTZ(0x00000040, 6) + CTO(0x0000003f, 6) + CLZ(0x00000020, 26) + CLO(0xfc000000, 6) + CTZ(0x00000020, 5) + CTO(0x0000001f, 5) + CLZ(0x00000010, 27) + CLO(0xf8000000, 5) + CTZ(0x00000010, 4) + CTO(0x0000000f, 4) + CLZ(0x00000008, 28) + CLO(0xf0000000, 4) + CTZ(0x00000008, 3) + CTO(0x00000007, 3) + CLZ(0x00000004, 29) + CLO(0xe0000000, 3) + CTZ(0x00000004, 2) + CTO(0x00000003, 2) + CLZ(0x00000002, 30) + CLO(0xc0000000, 2) + CTZ(0x00000002, 1) + CTO(0x00000001, 1) + CLZ(0x00000001, 31) + CLO(0x80000000, 1) + CTZ(0x00000001, 0) + CTO(0x00000000, 0) + CLZ(0x00000000, 32) + CLO(0x00000000, 0) + CTZ(0x00000000, 32) + CTO(0xffffffff, 32) +#else + CLZ(0x8000000000000000, 0) + CLO(0xffffffffffffffff, 64) + CTZ(0x8000000000000000, 63) + CTO(0x7fffffffffffffff, 63) + CLZ(0x4000000000000000, 1) + CLO(0xfffffffffffffffe, 63) + CTZ(0x4000000000000000, 62) + CTO(0x3fffffffffffffff, 62) + CLZ(0x2000000000000000, 2) + CLO(0xfffffffffffffffc, 62) + CTZ(0x2000000000000000, 61) + CTO(0x1fffffffffffffff, 61) + CLZ(0x1000000000000000, 3) + CLO(0xfffffffffffffff8, 61) + CTZ(0x1000000000000000, 60) + CTO(0x0fffffffffffffff, 60) + CLZ(0x0800000000000000, 4) + CLO(0xfffffffffffffff0, 60) + CTZ(0x0800000000000000, 59) + CTO(0x07ffffffffffffff, 59) + CLZ(0x0400000000000000, 5) + CLO(0xffffffffffffffe0, 59) + CTZ(0x0400000000000000, 58) + CTO(0x03ffffffffffffff, 58) + CLZ(0x0200000000000000, 6) + CLO(0xffffffffffffffc0, 58) + CTZ(0x0200000000000000, 57) + CTO(0x01ffffffffffffff, 57) + CLZ(0x0100000000000000, 7) + CLO(0xffffffffffffff80, 57) + CTZ(0x0100000000000000, 56) + CTO(0x00ffffffffffffff, 56) + CLZ(0x0080000000000000, 8) + CLO(0xffffffffffffff00, 56) + CTZ(0x0080000000000000, 55) + CTO(0x007fffffffffffff, 55) + CLZ(0x0040000000000000, 9) + CLO(0xfffffffffffffe00, 55) + CTZ(0x0040000000000000, 54) + CTO(0x003fffffffffffff, 54) + CLZ(0x0020000000000000, 10) + CLO(0xfffffffffffffc00, 54) + CTZ(0x0020000000000000, 53) + CTO(0x001fffffffffffff, 53) + CLZ(0x0010000000000000, 11) + CLO(0xfffffffffffff800, 53) + CTZ(0x0010000000000000, 52) + CTO(0x000fffffffffffff, 52) + CLZ(0x0008000000000000, 12) + CLO(0xfffffffffffff000, 52) + CTZ(0x0008000000000000, 51) + CTO(0x0007ffffffffffff, 51) + CLZ(0x0004000000000000, 13) + CLO(0xffffffffffffe000, 51) + CTZ(0x0004000000000000, 50) + CTO(0x0003ffffffffffff, 50) + CLZ(0x0002000000000000, 14) + CLO(0xffffffffffffc000, 50) + CTZ(0x0002000000000000, 49) + CTO(0x0001ffffffffffff, 49) + CLZ(0x0001000000000000, 15) + CLO(0xffffffffffff8000, 49) + CTZ(0x0001000000000000, 48) + CTO(0x0000ffffffffffff, 48) + CLZ(0x0000800000000000, 16) + CLO(0xffffffffffff0000, 48) + CTZ(0x0000800000000000, 47) + CTO(0x00007fffffffffff, 47) + CLZ(0x0000400000000000, 17) + CLO(0xfffffffffffe0000, 47) + CTZ(0x0000400000000000, 46) + CTO(0x00003fffffffffff, 46) + CLZ(0x0000200000000000, 18) + CLO(0xfffffffffffc0000, 46) + CTZ(0x0000200000000000, 45) + CTO(0x00001fffffffffff, 45) + CLZ(0x0000100000000000, 19) + CLO(0xfffffffffff80000, 45) + CTZ(0x0000100000000000, 44) + CTO(0x00000fffffffffff, 44) + CLZ(0x0000080000000000, 20) + CLO(0xfffffffffff00000, 44) + CTZ(0x0000080000000000, 43) + CTO(0x000007ffffffffff, 43) + CLZ(0x0000040000000000, 21) + CLO(0xffffffffffe00000, 43) + CTZ(0x0000040000000000, 42) + CTO(0x000003ffffffffff, 42) + CLZ(0x0000020000000000, 22) + CLO(0xffffffffffc00000, 42) + CTZ(0x0000020000000000, 41) + CTO(0x000001ffffffffff, 41) + CLZ(0x0000010000000000, 23) + CLO(0xffffffffff800000, 41) + CTZ(0x0000010000000000, 40) + CTO(0x000000ffffffffff, 40) + CLZ(0x0000008000000000, 24) + CLO(0xffffffffff000000, 40) + CTZ(0x0000008000000000, 39) + CTO(0x0000007fffffffff, 39) + CLZ(0x0000004000000000, 25) + CLO(0xfffffffffe000000, 39) + CTZ(0x0000004000000000, 38) + CTO(0x0000003fffffffff, 38) + CLZ(0x0000002000000000, 26) + CLO(0xfffffffffc000000, 38) + CTZ(0x0000002000000000, 37) + CTO(0x0000001fffffffff, 37) + CLZ(0x0000001000000000, 27) + CLO(0xfffffffff8000000, 37) + CTZ(0x0000001000000000, 36) + CTO(0x0000000fffffffff, 36) + CLZ(0x0000000800000000, 28) + CLO(0xfffffffff0000000, 36) + CTZ(0x0000000800000000, 35) + CTO(0x00000007ffffffff, 35) + CLZ(0x0000000400000000, 29) + CLO(0xffffffffe0000000, 35) + CTZ(0x0000000400000000, 34) + CTO(0x00000003ffffffff, 34) + CLZ(0x0000000200000000, 30) + CLO(0xffffffffc0000000, 34) + CTZ(0x0000000200000000, 33) + CTO(0x00000001ffffffff, 33) + CLZ(0x0000000100000000, 31) + CLO(0xffffffff80000000, 33) + CTZ(0x0000000100000000, 32) + CTO(0x00000000ffffffff, 32) + CLZ(0x0000000080000000, 32) + CLO(0xffffffff00000000, 32) + CTZ(0x0000000080000000, 31) + CTO(0x000000007fffffff, 31) + CLZ(0x0000000040000000, 33) + CLO(0xfffffffe00000000, 31) + CTZ(0x0000000040000000, 30) + CTO(0x000000003fffffff, 30) + CLZ(0x0000000020000000, 34) + CLO(0xfffffffc00000000, 30) + CTZ(0x0000000020000000, 29) + CTO(0x000000001fffffff, 29) + CLZ(0x0000000010000000, 35) + CLO(0xfffffff800000000, 29) + CTZ(0x0000000010000000, 28) + CTO(0x000000000fffffff, 28) + CLZ(0x0000000008000000, 36) + CLO(0xfffffff000000000, 28) + CTZ(0x0000000008000000, 27) + CTO(0x0000000007ffffff, 27) + CLZ(0x0000000004000000, 37) + CLO(0xffffffe000000000, 27) + CTZ(0x0000000004000000, 26) + CTO(0x0000000003ffffff, 26) + CLZ(0x0000000002000000, 38) + CLO(0xffffffc000000000, 26) + CTZ(0x0000000002000000, 25) + CTO(0x0000000001ffffff, 25) + CLZ(0x0000000001000000, 39) + CLO(0xffffff8000000000, 25) + CTZ(0x0000000001000000, 24) + CTO(0x0000000000ffffff, 24) + CLZ(0x0000000000800000, 40) + CLO(0xffffff0000000000, 24) + CTZ(0x0000000000800000, 23) + CTO(0x00000000007fffff, 23) + CLZ(0x0000000000400000, 41) + CLO(0xfffffe0000000000, 23) + CTZ(0x0000000000400000, 22) + CTO(0x00000000003fffff, 22) + CLZ(0x0000000000200000, 42) + CLO(0xfffffc0000000000, 22) + CTZ(0x0000000000200000, 21) + CTO(0x00000000001fffff, 21) + CLZ(0x0000000000100000, 43) + CLO(0xfffff80000000000, 21) + CTZ(0x0000000000100000, 20) + CTO(0x00000000000fffff, 20) + CLZ(0x0000000000080000, 44) + CLO(0xfffff00000000000, 20) + CTZ(0x0000000000080000, 19) + CTO(0x000000000007ffff, 19) + CLZ(0x0000000000040000, 45) + CLO(0xffffe00000000000, 19) + CTZ(0x0000000000040000, 18) + CTO(0x000000000003ffff, 18) + CLZ(0x0000000000020000, 46) + CLO(0xffffc00000000000, 18) + CTZ(0x0000000000020000, 17) + CTO(0x000000000001ffff, 17) + CLZ(0x0000000000010000, 47) + CLO(0xffff800000000000, 17) + CTZ(0x0000000000010000, 16) + CTO(0x000000000000ffff, 16) + CLZ(0x0000000000008000, 48) + CLO(0xffff000000000000, 16) + CTZ(0x0000000000008000, 15) + CTO(0x0000000000007fff, 15) + CLZ(0x0000000000004000, 49) + CLO(0xfffe000000000000, 15) + CTZ(0x0000000000004000, 14) + CTO(0x0000000000003fff, 14) + CLZ(0x0000000000002000, 50) + CLO(0xfffc000000000000, 14) + CTZ(0x0000000000002000, 13) + CTO(0x0000000000001fff, 13) + CLZ(0x0000000000001000, 51) + CLO(0xfff8000000000000, 13) + CTZ(0x0000000000001000, 12) + CTO(0x0000000000000fff, 12) + CLZ(0x0000000000000800, 52) + CLO(0xfff0000000000000, 12) + CTZ(0x0000000000000800, 11) + CTO(0x00000000000007ff, 11) + CLZ(0x0000000000000400, 53) + CLO(0xffe0000000000000, 11) + CTZ(0x0000000000000400, 10) + CTO(0x00000000000003ff, 10) + CLZ(0x0000000000000200, 54) + CLO(0xffc0000000000000, 10) + CTZ(0x0000000000000200, 9) + CTO(0x00000000000001ff, 9) + CLZ(0x0000000000000100, 55) + CLO(0xff80000000000000, 9) + CTZ(0x0000000000000100, 8) + CTO(0x00000000000000ff, 8) + CLZ(0x0000000000000080, 56) + CLO(0xff00000000000000, 8) + CTZ(0x0000000000000080, 7) + CTO(0x000000000000007f, 7) + CLZ(0x0000000000000040, 57) + CLO(0xfe00000000000000, 7) + CTZ(0x0000000000000040, 6) + CTO(0x000000000000003f, 6) + CLZ(0x0000000000000020, 58) + CLO(0xfc00000000000000, 6) + CTZ(0x0000000000000020, 5) + CTO(0x000000000000001f, 5) + CLZ(0x0000000000000010, 59) + CLO(0xf800000000000000, 5) + CTZ(0x0000000000000010, 4) + CTO(0x000000000000000f, 4) + CLZ(0x0000000000000008, 60) + CLO(0xf000000000000000, 4) + CTZ(0x0000000000000008, 3) + CTO(0x0000000000000007, 3) + CLZ(0x0000000000000004, 61) + CLO(0xe000000000000000, 3) + CTZ(0x0000000000000004, 2) + CTO(0x0000000000000003, 2) + CLZ(0x0000000000000002, 62) + CLO(0xc000000000000000, 2) + CTZ(0x0000000000000002, 1) + CTO(0x0000000000000001, 1) + CLZ(0x0000000000000001, 63) + CLO(0x8000000000000000, 1) + CTZ(0x0000000000000001, 0) + CTO(0x0000000000000000, 0) + CLZ(0x0000000000000000, 64) + CLO(0x0000000000000000, 0) + CTZ(0x0000000000000000, 64) + CTO(0xffffffffffffffff, 64) +#endif + prepare + pushargi ok + finishi @printf + reti 0 + epilog +#endif diff --git a/deps/lightning/check/call.tst b/deps/lightning/check/call.tst index 21068b6e..40fb0415 100644 --- a/deps/lightning/check/call.tst +++ b/deps/lightning/check/call.tst @@ -1,10 +1,10 @@ -#define def_wi(i) \ +#define def_wi(i, ii) \ name _w##i \ _w##i: \ prolog \ - arg $arg##i \ + arg##ii $arg##i \ getarg##i %r0 $arg##i \ - retr %r0 \ + retr##i %r0 \ epilog #define def_wf(f) \ name _w##f \ @@ -15,11 +15,11 @@ _w##f: \ truncr##f %r0 %f0 \ retr %r0 \ epilog -#define def_fi(f, i) \ +#define def_fi(f, i, ii) \ name f##i \ f##i: \ prolog \ - arg $arg##i \ + arg##ii $arg##i \ getarg##i %r0 $arg##i \ extr##f %f0 %r0 \ retr##f %f0 \ @@ -52,33 +52,33 @@ bstr: .code jmpi main - def_wi(_c) - def_wi(_uc) - def_wi(_s) - def_wi(_us) + def_wi(_c, _c) + def_wi(_uc, _c) + def_wi(_s, _s) + def_wi(_us, _s) #if __WORDSIZE == 64 - def_wi(_i) - def_wi(_ui) + def_wi(_i, _i) + def_wi(_ui, _i) #endif def_wf(_f) def_wf(_d) - def_fi(_f, _c) - def_fi(_f, _uc) - def_fi(_f, _s) - def_fi(_f, _us) - def_fi(_f, _i) + def_fi(_f, _c, _c) + def_fi(_f, _uc, _c) + def_fi(_f, _s, _s) + def_fi(_f, _us, _s) + def_fi(_f, _i, _i) #if __WORDSIZE == 64 - def_fi(_f, _ui) - def_fi(_f, _l) + def_fi(_f, _ui, _i) + def_fi(_f, _l, _l) #endif - def_fi(_d, _c) - def_fi(_d, _uc) - def_fi(_d, _s) - def_fi(_d, _us) - def_fi(_d, _i) + def_fi(_d, _c, _c) + def_fi(_d, _uc, _c) + def_fi(_d, _s, _s) + def_fi(_d, _us, _s) + def_fi(_d, _i, _i) #if __WORDSIZE == 64 - def_fi(_d, _ui) - def_fi(_d, _l) + def_fi(_d, _ui, _i) + def_fi(_d, _l, _l) #endif def_f(_f) def_f(_d) @@ -91,7 +91,7 @@ main: #define _call_w(n, i, a, r) \ prepare \ - pushargi a \ + pushargi##i a \ finishi _w##i \ retval %r0 \ extr##i %r0 %r0 \ @@ -111,7 +111,7 @@ _w##f##_##n: #define call_wf(n, f, a, r) _call_wf(n, f, a, r) #define _call_fi(n, f, i, a, r) \ prepare \ - pushargi a \ + pushargi##i a \ finishi f##i \ retval##f %f0 \ beqi##f f##i##n %f0 r \ @@ -196,6 +196,7 @@ f##g##n: call_wf(__LINE__, _d, c7f, f7f) call_wf(__LINE__, _d, wc80, f80) call_wf(__LINE__, _d, wc81, f81) + call_fi(__LINE__, _f, _c, c7f, f7f) call_fi(__LINE__, _f, _c, c80, f80) call_fi(__LINE__, _f, _uc, c7f, f7f) diff --git a/deps/lightning/check/carg.c b/deps/lightning/check/carg.c index 35b897ed..6992db47 100644 --- a/deps/lightning/check/carg.c +++ b/deps/lightning/check/carg.c @@ -58,8 +58,8 @@ int main(int argc, char *argv[]) { void (*code)(void); - jit_node_t *jmp, *pass; - jit_node_t *jw, *jf, *jd; + jit_node_t *jmp, *pass, *fail; + jit_node_t *jw, *jf, *jd; jit_int32_t s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16; jit_node_t *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8, @@ -172,10 +172,11 @@ main(int argc, char *argv[]) LOAD_ARG(16); #undef LOAD_ARG pass = jit_forward(); + fail = jit_forward(); #define CHECK_ARG(N) \ do { \ jit_getarg(JIT_R0, a##N); \ - jit_patch_at(jit_beqi(JIT_R0, 17 - N), pass); \ + jit_patch_at(jit_bnei(JIT_R0, 17 - N), fail); \ } while (0) CHECK_ARG(1); CHECK_ARG(2); @@ -194,6 +195,8 @@ main(int argc, char *argv[]) CHECK_ARG(15); CHECK_ARG(16); #undef CHECK_ARG + jit_patch_at(jit_jmpi(), pass); + jit_link(fail); jit_calli(abort); jit_link(pass); jit_ret(); @@ -300,10 +303,11 @@ main(int argc, char *argv[]) LOAD_ARG(16); #undef LOAD_ARG pass = jit_forward(); + fail = jit_forward(); #define CHECK_ARG(N) \ do { \ jit_getarg_f(JIT_F0, a##N); \ - jit_patch_at(jit_beqi_f(JIT_F0, 17 - N), pass); \ + jit_patch_at(jit_bnei_f(JIT_F0, 17 - N), fail); \ } while (0) CHECK_ARG(1); CHECK_ARG(2); @@ -322,6 +326,8 @@ main(int argc, char *argv[]) CHECK_ARG(15); CHECK_ARG(16); #undef CHECK_ARG + jit_patch_at(jit_jmpi(), pass); + jit_link(fail); jit_calli(abort); jit_link(pass); jit_ret(); @@ -428,10 +434,11 @@ main(int argc, char *argv[]) LOAD_ARG(16); #undef LOAD_ARG pass = jit_forward(); + fail = jit_forward(); #define CHECK_ARG(N) \ do { \ jit_getarg_d(JIT_F0, a##N); \ - jit_patch_at(jit_beqi_d(JIT_F0, 17 - N), pass); \ + jit_patch_at(jit_bnei_d(JIT_F0, 17 - N), fail); \ } while (0) CHECK_ARG(1); CHECK_ARG(2); @@ -450,6 +457,8 @@ main(int argc, char *argv[]) CHECK_ARG(15); CHECK_ARG(16); #undef CHECK_ARG + jit_patch_at(jit_jmpi(), pass); + jit_link(fail); jit_calli(abort); jit_link(pass); jit_ret(); @@ -484,6 +493,7 @@ main(int argc, char *argv[]) jit_pushargi(1); } jit_patch_at(jit_finishi(NULL), jw); + jit_prepare(); { jit_pushargi_f(16); @@ -504,6 +514,7 @@ main(int argc, char *argv[]) jit_pushargi_f(1); } jit_patch_at(jit_finishi(NULL), jf); + jit_prepare(); { jit_pushargi_d(16); diff --git a/deps/lightning/check/catomic.c b/deps/lightning/check/catomic.c index ef09076c..e1e2ea76 100644 --- a/deps/lightning/check/catomic.c +++ b/deps/lightning/check/catomic.c @@ -150,7 +150,10 @@ main(int argc, char *argv[]) #define join(tid) \ /* load pthread_t value in JIT_R0 */ \ jit_movi(JIT_R0, (jit_word_t)tids); \ - jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t)); \ + if (__WORDSIZE == 64 && sizeof(pthread_t) == 4) \ + jit_ldxi_i(JIT_R0, JIT_R0, tid * sizeof(pthread_t)); \ + else \ + jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t)); \ jit_prepare(); \ jit_pushargr(JIT_R0); \ jit_pushargi((jit_word_t)NULL); \ diff --git a/deps/lightning/check/ccall.c b/deps/lightning/check/ccall.c index 9dae2569..3491f2e9 100644 --- a/deps/lightning/check/ccall.c +++ b/deps/lightning/check/ccall.c @@ -132,6 +132,16 @@ # define _l15 _w15 #endif +#ifndef jit_arg_uc +# define jit_arg_uc jit_arg_c +#endif +#ifndef jit_arg_us +# define jit_arg_us jit_arg_s +#endif +#ifndef jit_arg_ui +# define jit_arg_ui jit_arg_i +#endif + /* * Types */ @@ -624,7 +634,7 @@ main(int argc, char *argv[]) #define arg15(T) arg14(T) a15 = jit_arg##T(); #define get0(B,T,R) jit_movi##B(R##0,0); -#define get1(B,T,R) jit_getarg##B(R##0,a##1); +#define get1(B,T,R) jit_getarg##T(R##0,a##1); #define get2(B,T,R) \ get1(B,T,R); \ jit_movr##B(R##1, R##0); \ @@ -707,7 +717,7 @@ main(int argc, char *argv[]) n##T##N = jit_name(strfy(n##T##N)); \ jit_note("ccall.c", __LINE__); \ jit_prolog(); \ - arg##N(); \ + arg##N(T); \ get##N(,T,JIT_R) \ jit_extr##T(JIT_R0, JIT_R0); \ jit_retr(JIT_R0); \ @@ -777,7 +787,7 @@ main(int argc, char *argv[]) #define calin(T,N) \ jit_prepare(); \ - push##N() \ + push##N(T) \ jit_finishi(C##T##N); \ jit_retval##T(JIT_R0); \ jmp = jit_beqi(JIT_R0, T##N); \ @@ -826,7 +836,7 @@ main(int argc, char *argv[]) #undef calfn #define calin(T,N) \ jit_prepare(); \ - push##N() \ + push##N(T) \ jit_finishi(CJ##T##N); \ jit_retval##T(JIT_R0); \ jmp = jit_beqi(JIT_R0, T##N); \ diff --git a/deps/lightning/check/factorial.tst b/deps/lightning/check/factorial.tst new file mode 100644 index 00000000..68adbb2c --- /dev/null +++ b/deps/lightning/check/factorial.tst @@ -0,0 +1,73 @@ +.data 32 +str: +.c "%.0lf\n" +.code + jmpi main +/* + * double factorial(unsigned long n) { + * double r = 1; + * while (n > 1) { + * r *= n; + * --n; + * } + * return r; + * } + */ +factorial: + prolog + arg $n + getarg %r0 $n + movi_d %f0 1.0 + extr_d %f1 %r0 + movr_d %f2 %f0 +loop: + bltr_d done %f1 %f2 + mulr_d %f0 %f0 %f1 + subr_d %f1 %f1 %f2 + jmpi loop +done: + retr_d %f0 + epilog + +/* + * int main(int argc, char *argv[]) { + * unsigned long v; + * double d; + * if (argc == 2) + * v = strtoul(argv[1], NULL, 0); + * else + * v = 32; + * d = factorial(v); + * printf("%.0lf\n", d); + * return 0; + * } + */ +main: + prolog + arg $argc + arg $argv + getarg %r0 $argc + bnei default %r0 2 + getarg %v0 $argv + ldxi %r0 %v0 $(__WORDSIZE >> 3) + prepare + pushargr %r0 + pushargi 0 + pushargi 0 + finishi @strtoul + retval %v0 + jmpi call +default: + movi %v0 32 +call: + prepare + pushargr %v0 + finishi factorial + retval_d %f0 + prepare + pushargi str + ellipsis + pushargr_d %f0 + finishi @printf + reti 0 + epilog diff --git a/deps/lightning/check/fib.tst b/deps/lightning/check/fib.tst index 0835323c..926ee819 100644 --- a/deps/lightning/check/fib.tst +++ b/deps/lightning/check/fib.tst @@ -32,7 +32,7 @@ main: arg $argc arg $argv - getarg_i %r0 $argc + getarg %r0 $argc blei default %r0 1 getarg %r0 $argv addi %r0 %r0 $(__WORDSIZE >> 3) diff --git a/deps/lightning/check/float.tst b/deps/lightning/check/float.tst index 05a0889a..69a6cafa 100644 --- a/deps/lightning/check/float.tst +++ b/deps/lightning/check/float.tst @@ -14,9 +14,9 @@ ok: # define x80 0x8000000000000000 #endif -#if __mips__ || __sparc__ || __hppa__ || __riscv +#if (__mips__ && __mips_isa_rev < 6) || __sparc__ || __hppa__ || __riscv # define wnan x7f -#elif __arm__ || __aarch64__ || __alpha__ || __loongarch__ +#elif (__mips__ && __mips_isa_rev >= 6) || __arm__ || __aarch64__ || __alpha__ || __loongarch__ # define wnan 0 #else # define wnan x80 diff --git a/deps/lightning/check/lightning.c b/deps/lightning/check/lightning.c index 4f3b052b..80ea0816 100644 --- a/deps/lightning/check/lightning.c +++ b/deps/lightning/check/lightning.c @@ -270,10 +270,16 @@ static jit_pointer_t get_arg(void); static jit_word_t get_imm(void); static void live(void); static void align(void); static void name(void); +static void skip(void); static void prolog(void); static void frame(void); static void tramp(void); static void ellipsis(void); static void allocai(void); static void allocar(void); +static void arg_c(void); static void arg_s(void); +static void arg_i(void); +#if __WORDSIZE == 64 +static void arg_l(void); +#endif static void arg(void); static void getarg_c(void); static void getarg_uc(void); static void getarg_s(void); static void getarg_us(void); @@ -282,6 +288,15 @@ static void getarg_i(void); static void getarg_ui(void); static void getarg_l(void); #endif static void getarg(void); +static void putargr_c(void); static void putargi_c(void); +static void putargr_uc(void); static void putargi_uc(void); +static void putargr_s(void); static void putargi_s(void); +static void putargr_us(void); static void putargi_us(void); +static void putargr_i(void); static void putargi_i(void); +#if __WORDSIZE == 64 +static void putargr_ui(void); static void putargi_ui(void); +static void putargr_l(void); static void putargi_l(void); +#endif static void putargr(void); static void putargi(void); static void addr(void); static void addi(void); static void addxr(void); static void addxi(void); @@ -306,6 +321,8 @@ static void lshr(void); static void lshi(void); static void rshr(void); static void rshi(void); static void rshr_u(void); static void rshi_u(void); static void negr(void); static void comr(void); +static void clor(void); static void clzr(void); +static void ctor(void); static void ctzr(void); static void ltr(void); static void lti(void); static void ltr_u(void); static void lti_u(void); static void ler(void); static void lei(void); @@ -392,9 +409,30 @@ static void bxsubr_u(void); static void bxsubi_u(void); static void jmpr(void); static void jmpi(void); static void callr(void); static void calli(void); static void prepare(void); + +static void pushargr_c(void); static void pushargi_c(void); +static void pushargr_uc(void); static void pushargi_uc(void); +static void pushargr_s(void); static void pushargi_s(void); +static void pushargr_us(void); static void pushargi_us(void); +static void pushargr_i(void); static void pushargi_i(void); +#if __WORDSIZE == 64 +static void pushargr_ui(void); static void pushargi_ui(void); +static void pushargr_l(void); static void pushargi_l(void); +#endif static void pushargr(void); static void pushargi(void); + static void finishr(void); static void finishi(void); static void ret(void); + +static void retr_c(void); static void reti_c(void); +static void retr_uc(void); static void reti_uc(void); +static void retr_s(void); static void reti_s(void); +static void retr_us(void); static void reti_us(void); +static void retr_i(void); static void reti_i(void); +#if __WORDSIZE == 64 +static void retr_ui(void); static void reti_ui(void); +static void retr_l(void); static void reti_l(void); +#endif static void retr(void); static void reti(void); static void retval_c(void); static void retval_uc(void); static void retval_s(void); static void retval_us(void); @@ -591,10 +629,16 @@ static instr_t instr_vector[] = { #define entry2(name, function) { NULL, name, function } entry(live), entry(align), entry(name), + entry(skip), entry(prolog), entry(frame), entry(tramp), entry(ellipsis), entry(allocai), entry(allocar), + entry(arg_c), entry(arg_s), + entry(arg_i), +#if __WORDSIZE == 64 + entry(arg_l), +#endif entry(arg), entry(getarg_c), entry(getarg_uc), entry(getarg_s), entry(getarg_us), @@ -603,6 +647,16 @@ static instr_t instr_vector[] = { entry(getarg_ui), entry(getarg_l), #endif entry(getarg), + + entry(putargr_c), entry(putargi_c), + entry(putargr_uc), entry(putargi_uc), + entry(putargr_s), entry(putargi_s), + entry(putargr_us), entry(putargi_us), + entry(putargr_i), entry(putargi_i), +#if __WORDSIZE == 64 + entry(putargr_ui), entry(putargi_ui), + entry(putargr_l), entry(putargi_l), +#endif entry(putargr), entry(putargi), entry(addr), entry(addi), entry(addxr), entry(addxi), @@ -627,6 +681,8 @@ static instr_t instr_vector[] = { entry(rshr), entry(rshi), entry(rshr_u), entry(rshi_u), entry(negr), entry(comr), + entry(clor), entry(clzr), + entry(ctor), entry(ctzr), entry(ltr), entry(lti), entry(ltr_u), entry(lti_u), entry(ler), entry(lei), @@ -713,9 +769,27 @@ static instr_t instr_vector[] = { entry(jmpr), entry(jmpi), entry(callr), entry(calli), entry(prepare), + entry(pushargr_c), entry(pushargi_c), + entry(pushargr_uc), entry(pushargi_uc), + entry(pushargr_s), entry(pushargi_s), + entry(pushargr_us), entry(pushargi_us), + entry(pushargr_i), entry(pushargi_i), +#if __WORDSIZE == 64 + entry(pushargr_ui), entry(pushargi_ui), + entry(pushargr_l), entry(pushargi_l), +#endif entry(pushargr), entry(pushargi), entry(finishr), entry(finishi), entry(ret), + entry(retr_c), entry(reti_c), + entry(retr_uc), entry(reti_uc), + entry(retr_s), entry(reti_s), + entry(retr_us), entry(reti_us), + entry(retr_i), entry(reti_i), +#if __WORDSIZE == 64 + entry(retr_ui), entry(reti_ui), + entry(retr_l), entry(reti_l), +#endif entry(retr), entry(reti), entry(retval_c), entry(retval_uc), entry(retval_s), entry(retval_us), @@ -1400,6 +1474,7 @@ live(void) { jit_live(parser.regval); } entry_im(align) +entry_im(skip) entry(prolog) entry_im(frame) entry_im(tramp) entry(ellipsis) @@ -1413,6 +1488,11 @@ allocai(void) { symbol->value.i = i; } entry_ir_ir(allocar) +entry_ca(arg_c) entry_ca(arg_s) +entry_ca(arg_i) +#if __WORDSIZE == 64 +entry_ca(arg_l) +#endif entry_ca(arg) entry_ia(getarg_c) entry_ia(getarg_uc) entry_ia(getarg_s) entry_ia(getarg_us) @@ -1421,6 +1501,15 @@ entry_ia(getarg_i) entry_ia(getarg_ui) entry_ia(getarg_l) #endif entry_ia(getarg) +entry_ia(putargr_c) entry_ima(putargi_c) +entry_ia(putargr_uc) entry_ima(putargi_uc) +entry_ia(putargr_s) entry_ima(putargi_s) +entry_ia(putargr_us) entry_ima(putargi_us) +entry_ia(putargr_i) entry_ima(putargi_i) +#if __WORDSIZE == 64 +entry_ia(putargr_ui) entry_ima(putargi_ui) +entry_ia(putargr_l) entry_ima(putargi_l) +#endif entry_ia(putargr) entry_ima(putargi) entry_ir_ir_ir(addr) entry_ir_ir_im(addi) entry_ir_ir_ir(addxr) entry_ir_ir_im(addxi) @@ -1445,6 +1534,8 @@ entry_ir_ir_ir(lshr) entry_ir_ir_im(lshi) entry_ir_ir_ir(rshr) entry_ir_ir_im(rshi) entry_ir_ir_ir(rshr_u) entry_ir_ir_im(rshi_u) entry_ir_ir(negr) entry_ir_ir(comr) +entry_ir_ir(clor) entry_ir_ir(clzr) +entry_ir_ir(ctor) entry_ir_ir(ctzr) entry_ir_ir_ir(ltr) entry_ir_ir_im(lti) entry_ir_ir_ir(ltr_u) entry_ir_ir_im(lti_u) entry_ir_ir_ir(ler) entry_ir_ir_im(lei) @@ -1578,9 +1669,27 @@ entry_lb_ir_ir(bxsubr_u) entry_lb_ir_im(bxsubi_u) entry_ir(jmpr) entry_lb(jmpi) entry_ir(callr) entry_fn(calli) entry(prepare) +entry_ir(pushargr_c) entry_im(pushargi_c) +entry_ir(pushargr_uc) entry_im(pushargi_uc) +entry_ir(pushargr_s) entry_im(pushargi_s) +entry_ir(pushargr_us) entry_im(pushargi_us) +entry_ir(pushargr_i) entry_im(pushargi_i) +#if __WORDSIZE == 64 +entry_ir(pushargr_ui) entry_im(pushargi_ui) +entry_ir(pushargr_l) entry_im(pushargi_l) +#endif entry_ir(pushargr) entry_im(pushargi) entry_ir(finishr) entry_fn(finishi) entry(ret) +entry_ir(retr_c) entry_im(reti_c) +entry_ir(retr_uc) entry_im(reti_uc) +entry_ir(retr_s) entry_im(reti_s) +entry_ir(retr_us) entry_im(reti_us) +entry_ir(retr_i) entry_im(reti_i) +#if __WORDSIZE == 64 +entry_ir(retr_ui) entry_im(reti_ui) +entry_ir(retr_l) entry_im(reti_l) +#endif entry_ir(retr) entry_im(reti) entry_ir(retval_c) entry_ir(retval_uc) entry_ir(retval_s) entry_ir(retval_us) @@ -4257,6 +4366,9 @@ main(int argc, char *argv[]) opt_short += snprintf(cmdline + opt_short, sizeof(cmdline) - opt_short, " -D__mips__=1"); + opt_short += snprintf(cmdline + opt_short, + sizeof(cmdline) - opt_short, + " -D__mips_isa_rev=%d", jit_cpu.release); #endif #if defined(__arm__) opt_short += snprintf(cmdline + opt_short, diff --git a/deps/lightning/check/protect.c b/deps/lightning/check/protect.c new file mode 100644 index 00000000..f5ec7406 --- /dev/null +++ b/deps/lightning/check/protect.c @@ -0,0 +1,59 @@ +/* + * Simple test of (un)protecting a code buffer. + */ + +#include +#include +#include + +#define MARKER 10 + +int +main(int argc, char *argv[]) +{ + jit_state_t *_jit; + jit_node_t *load, *label, *ok; + unsigned char *ptr; + void (*function)(void); + int mmap_prot, mmap_flags; + + init_jit(argv[0]); + _jit = jit_new_state(); + + jit_prolog(); + + load = jit_movi(JIT_R0, 0); + jit_ldr_c(JIT_R0, JIT_R0); + ok = jit_forward(); + jit_patch_at(jit_beqi(JIT_R0, MARKER), ok); + jit_prepare(); + jit_pushargi(1); + jit_finishi(exit); + label = jit_indirect(); + jit_skip(1); /* Reserves enough space for a byte. */ + jit_patch_at(load, label); + jit_link(ok); + jit_prepare(); + jit_pushargi((jit_word_t)"%s\n"); + jit_ellipsis(); + jit_pushargi((jit_word_t)"ok"); + jit_finishi(printf); + + function = jit_emit(); + if (function == NULL) + abort(); + + jit_unprotect (); + ptr = jit_address (label); + *ptr = MARKER; + jit_protect (); + + jit_clear_state(); + + (*function)(); + + jit_destroy_state(); + finish_jit(); + + return (0); +} diff --git a/deps/lightning/check/put.tst b/deps/lightning/check/put.tst index a7e39e1c..65f1580a 100644 --- a/deps/lightning/check/put.tst +++ b/deps/lightning/check/put.tst @@ -9,49 +9,49 @@ ok: putr: prolog frame 160 - arg $ac - arg $auc - arg $as - arg $aus - arg $ai + arg_c $ac + arg_c $auc + arg_s $as + arg_s $aus + arg_i $ai #if __WORDSIZE == 64 - arg $aui - arg $al + arg_i $aui + arg_l $al #endif arg_f $af arg_d $ad arg $a #if __WORDSIZE == 64 - arg $_l - arg $_ui + arg_l $_l + arg_i $_ui #endif - arg $_i - arg $_us - arg $_s - arg $_uc - arg $_c + arg_i $_i + arg_s $_us + arg_s $_s + arg_c $_uc + arg_c $_c getarg_c %r0 $ac negr %r0 %r0 - putargr %r0 $ac + putargr_c %r0 $ac getarg_uc %r0 $auc negr %r0 %r0 - putargr %r0 $auc + putargr_uc %r0 $auc getarg_s %r0 $as negr %r0 %r0 - putargr %r0 $as + putargr_s %r0 $as getarg_us %r0 $aus negr %r0 %r0 - putargr %r0 $aus + putargr_us %r0 $aus getarg_i %r0 $ai negr %r0 %r0 - putargr %r0 $ai + putargr_i %r0 $ai #if __WORDSIZE == 64 getarg_ui %r0 $aui negr %r0 %r0 - putargr %r0 $aui + putargr_ui %r0 $aui getarg_l %r0 $al negr %r0 %r0 - putargr %r0 $al + putargr_l %r0 $al #endif getarg_f %f0 $af negr_f %f0 %f0 @@ -65,49 +65,49 @@ putr: #if __WORDSIZE == 64 getarg_l %r0 $_l negr %r0 %r0 - putargr %r0 $_l + putargr_l %r0 $_l getarg_ui %r0 $_ui negr %r0 %r0 - putargr %r0 $_ui + putargr_ui %r0 $_ui #endif getarg_i %r0 $_i negr %r0 %r0 - putargr %r0 $_i + putargr_i %r0 $_i getarg_us %r0 $_us negr %r0 %r0 - putargr %r0 $_us + putargr_us %r0 $_us getarg_s %r0 $_s negr %r0 %r0 - putargr %r0 $_s + putargr_s %r0 $_s getarg_uc %r0 $_uc negr %r0 %r0 - putargr %r0 $_uc + putargr_uc %r0 $_uc getarg_c %r0 $_c negr %r0 %r0 - putargr %r0 $_c + putargr_c %r0 $_c jmpi _putr rputr: - putargi 17 $ac - putargi 16 $auc - putargi 15 $as - putargi 14 $aus - putargi 13 $ai + putargi_c 17 $ac + putargi_uc 16 $auc + putargi_s 15 $as + putargi_us 14 $aus + putargi_i 13 $ai #if __WORDSIZE == 64 - putargi 12 $aui - putargi 11 $al + putargi_ui 12 $aui + putargi_l 11 $al #endif putargi_f 10 $af putargi_d 9 $ad putargi 8 $a #if __WORDSIZE == 64 - putargi 7 $_l - putargi 6 $_ui + putargi_l 7 $_l + putargi_ui 6 $_ui #endif - putargi 5 $_i - putargi 4 $_us - putargi 3 $_s - putargi 2 $_uc - putargi 1 $_c + putargi_i 5 $_i + putargi_us 4 $_us + putargi_s 3 $_s + putargi_uc 2 $_uc + putargi_c 1 $_c jmpi _puti rputi: ret @@ -117,27 +117,27 @@ rputi: _putr: prolog tramp 160 - arg $ac - arg $auc - arg $as - arg $aus - arg $ai + arg_c $ac + arg_c $auc + arg_s $as + arg_s $aus + arg_i $ai #if __WORDSIZE == 64 - arg $aui - arg $al + arg_i $aui + arg_l $al #endif arg_f $af arg_d $ad arg $a #if __WORDSIZE == 64 - arg $_l - arg $_ui + arg_l $_l + arg_i $_ui #endif - arg $_i - arg $_us - arg $_s - arg $_uc - arg $_c + arg_i $_i + arg_s $_us + arg_s $_s + arg_c $_uc + arg_c $_c getarg_c %r0 $ac beqi rac %r0 -1 calli @abort @@ -181,7 +181,7 @@ rad: calli @abort ra: #if __WORDSIZE == 64 - getarg %r0 $_l + getarg_l %r0 $_l beqi r_l %r0 -11 calli @abort r_l: @@ -217,27 +217,27 @@ r_c: _puti: prolog tramp 160 - arg $ac - arg $auc - arg $as - arg $aus - arg $ai + arg_c $ac + arg_c $auc + arg_s $as + arg_s $aus + arg_i $ai #if __WORDSIZE == 64 - arg $aui - arg $al + arg_i $aui + arg_l $al #endif arg_f $af arg_d $ad arg $a #if __WORDSIZE == 64 - arg $_l - arg $_ui + arg_l $_l + arg_i $_ui #endif - arg $_i - arg $_us - arg $_s - arg $_uc - arg $_c + arg_i $_i + arg_s $_us + arg_s $_s + arg_c $_uc + arg_c $_c getarg_c %r0 $ac beqi iac %r0 17 calli @abort @@ -281,7 +281,7 @@ iad: calli @abort ia: #if __WORDSIZE == 64 - getarg %r0 $_l + getarg_l %r0 $_l beqi i_l %r0 7 calli @abort i_l: @@ -390,27 +390,27 @@ fd2: main: prolog prepare - pushargi 1 - pushargi 2 - pushargi 3 - pushargi 4 - pushargi 5 + pushargi_c 1 + pushargi_uc 2 + pushargi_s 3 + pushargi_us 4 + pushargi_i 5 #if __WORDSIZE == 64 - pushargi 6 - pushargi 7 + pushargi_ui 6 + pushargi_l 7 #endif - pushargi_f 8 - pushargi_d 9 - pushargi 10 + pushargi_f 8 + pushargi_d 9 + pushargi 10 #if __WORDSIZE == 64 - pushargi 11 - pushargi 12 + pushargi_l 11 + pushargi_ui 12 #endif - pushargi 13 - pushargi 14 - pushargi 15 - pushargi 16 - pushargi 17 + pushargi_i 13 + pushargi_us 14 + pushargi_s 15 + pushargi_uc 16 + pushargi_c 17 finishi putr prepare pushargi 1 diff --git a/deps/lightning/check/riprel.c b/deps/lightning/check/riprel.c new file mode 100644 index 00000000..c776e2a9 --- /dev/null +++ b/deps/lightning/check/riprel.c @@ -0,0 +1,173 @@ +/* + * Simple test for x86_64 rip relative access that can also be useful + * on other ports when data is close to instruction pointer. + */ + +#include +#include +#include +#include +#if defined(__sgi) +# include +#endif +#include + +#ifndef MAP_ANON +# define MAP_ANON MAP_ANONYMOUS +# ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS 0 +# endif +#endif + +#if !defined(__sgi) +#define mmap_fd -1 +#endif + +int +main(int argc, char *argv[]) +{ + jit_uint8_t *ptr; + jit_state_t *_jit; + jit_word_t length; +#if defined(__sgi) + int mmap_fd; +#endif + void (*function)(void); + int mmap_prot, mmap_flags, result, pagesize; + int mult; + +#if defined(__ia64__) + mult = 8; +#else + mult = 2; +#endif + pagesize = sysconf(_SC_PAGESIZE); + if (pagesize < 4096) + pagesize = 4096; + +#if defined(__sgi) + mmap_fd = open("/dev/zero", O_RDWR); +#endif + + mmap_prot = PROT_READ | PROT_WRITE; +#if !(__OpenBSD__ || __APPLE__) + mmap_prot |= PROT_EXEC; +#endif +#if __NetBSD__ + mmap_prot = PROT_MPROTECT(mmap_prot); + mmap_flags = 0; +#else + mmap_flags = MAP_PRIVATE; +#endif + mmap_flags |= MAP_ANON; + ptr = mmap(NULL, pagesize * mult, mmap_prot, mmap_flags, mmap_fd, 0); + assert(ptr != MAP_FAILED); +#if defined(__sgi) + close(mmap_fd); +#endif + + init_jit(argv[0]); + _jit = jit_new_state(); + + jit_prolog(); + jit_movi(JIT_R0, 'c'); + jit_sti_c(ptr + 0, JIT_R0); + jit_movi(JIT_R0, 'C'); + jit_sti_c(ptr + 1, JIT_R0); + + jit_movi(JIT_R0, 's'); + jit_sti_s(ptr + 2, JIT_R0); + jit_movi(JIT_R0, 'S'); + jit_sti_s(ptr + 4, JIT_R0); + + jit_movi(JIT_R0, 'i'); + jit_sti_i(ptr + 8, JIT_R0); +#if __WORDSIZE == 64 + jit_movi(JIT_R0, 'I'); + jit_sti_i(ptr + 12, JIT_R0); + + jit_movi(JIT_R0, 'l'); + jit_sti_l(ptr + 16, JIT_R0); +#endif + jit_movi_f(JIT_F0, 1.0); + jit_sti_f(ptr + 24, JIT_F0); + jit_movi_d(JIT_F0, 2.0); + jit_sti_d(ptr + 32, JIT_F0); + + jit_ldi_c(JIT_R0, ptr + 0); + jit_ldi_s(JIT_R1, ptr + 2); + jit_ldi_i(JIT_R2, ptr + 8); +#if __WORDSIZE == 64 + jit_ldi_l(JIT_V0, ptr + 16); +#endif + jit_prepare(); +#if __WORDSIZE == 64 + jit_pushargi((jit_word_t)"%c %c %c %c\n"); +#else + jit_pushargi((jit_word_t)"%c %c %c l\n"); +#endif + jit_ellipsis(); + jit_pushargr(JIT_R0); + jit_pushargr(JIT_R1); + jit_pushargr(JIT_R2); +#if __WORDSIZE == 64 + jit_pushargr(JIT_V0); +#endif + jit_finishi(printf); + + jit_ldi_uc(JIT_R0, ptr + 1); + jit_ldi_us(JIT_R1, ptr + 4); +#if __WORDSIZE == 64 + jit_ldi_ui(JIT_R2, ptr + 12); +#endif + jit_prepare(); +#if __WORDSIZE == 64 + jit_pushargi((jit_word_t)"%c %c %c\n"); +#else + jit_pushargi((jit_word_t)"%c %c I\n"); +#endif + jit_ellipsis(); + jit_pushargr(JIT_R0); + jit_pushargr(JIT_R1); +#if __WORDSIZE == 64 + jit_pushargr(JIT_R2); +#endif + jit_finishi(printf); + + jit_ldi_f(JIT_F0, ptr + 24); + jit_extr_f_d(JIT_F0, JIT_F0); + jit_ldi_d(JIT_F1, ptr + 32); + + jit_prepare(); + jit_pushargi((jit_word_t)"%.1f %.1f\n"); + jit_ellipsis(); + jit_pushargr_d(JIT_F0); + jit_pushargr_d(JIT_F1); + jit_finishi(printf); + + jit_realize(); + + jit_set_code(ptr + pagesize, pagesize * (mult - 1)); + + #if __NetBSD__ || __OpenBSD__ || __APPLE__ + result = mprotect(ptr, pagesize, PROT_READ | PROT_WRITE); + assert(result == 0); +#endif + function = jit_emit(); + if (function == NULL) + abort(); + + //jit_disassemble(); + jit_clear_state(); +#if __NetBSD__ || __OpenBSD__ || __APPLE__ + result = mprotect(ptr + pagesize, pagesize, PROT_READ | PROT_EXEC); + assert(result == 0); +#endif + (*function)(); + jit_destroy_state(); + finish_jit(); + + munmap(ptr, pagesize * mult); + + return (0); +} diff --git a/deps/lightning/check/riprel.ok b/deps/lightning/check/riprel.ok new file mode 100644 index 00000000..4b908370 --- /dev/null +++ b/deps/lightning/check/riprel.ok @@ -0,0 +1,3 @@ +c s i l +C S I +1.0 2.0 diff --git a/deps/lightning/check/setcode.c b/deps/lightning/check/setcode.c index 62719eef..08611d96 100644 --- a/deps/lightning/check/setcode.c +++ b/deps/lightning/check/setcode.c @@ -31,14 +31,14 @@ main(int argc, char *argv[]) int mmap_fd; #endif void (*function)(void); - int mmap_prot, mmap_flags; + int mmap_prot, mmap_flags, result; #if defined(__sgi) mmap_fd = open("/dev/zero", O_RDWR); #endif mmap_prot = PROT_READ | PROT_WRITE; -#if !__OpenBSD__ +#if !(__OpenBSD__ || __APPLE__) mmap_prot |= PROT_EXEC; #endif #if __NetBSD__ @@ -83,7 +83,8 @@ main(int argc, char *argv[]) abort(); #if __NetBSD__ - assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_WRITE) == 0); + result = mprotect(ptr, 1024 * 1024, PROT_READ | PROT_WRITE); + assert(result == 0); #endif /* and calling again with enough space works */ jit_set_code(ptr, 1024 * 1024); @@ -92,8 +93,9 @@ main(int argc, char *argv[]) abort(); jit_clear_state(); -#if __NetBSD__ || __OpenBSD__ - assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_EXEC) == 0); +#if __NetBSD__ || __OpenBSD__ || __APPLE__ + result = mprotect(ptr, 1024 * 1024, PROT_READ | PROT_EXEC); + assert(result == 0); #endif (*function)(); jit_destroy_state(); diff --git a/deps/lightning/check/skip.ok b/deps/lightning/check/skip.ok new file mode 100644 index 00000000..f599e28b --- /dev/null +++ b/deps/lightning/check/skip.ok @@ -0,0 +1 @@ +10 diff --git a/deps/lightning/check/skip.tst b/deps/lightning/check/skip.tst new file mode 100644 index 00000000..94eec76f --- /dev/null +++ b/deps/lightning/check/skip.tst @@ -0,0 +1,13 @@ +.data 32 +fmt: +.c "%d\n" +.code + prolog + skip 4 + prepare + pushargi fmt + ellipsis + pushargi 10 + finishi @printf + ret + epilog diff --git a/deps/lightning/check/stack.tst b/deps/lightning/check/stack.tst index e6997193..1ebe4f56 100644 --- a/deps/lightning/check/stack.tst +++ b/deps/lightning/check/stack.tst @@ -55,7 +55,7 @@ fill##T##done: \ #define fill_us fill_s #define fill_ui fill_i -#define ARG( T, N) arg $arg##T##N +#define ARG( T, N) arg##T $arg##T##N #define ARGF( T, N) arg##T $arg##T##N #define ARG1( K, T) ARG##K(T, 0) #define ARG2( K, T) ARG1( K, T) ARG##K(T, 1) @@ -74,56 +74,56 @@ fill##T##done: \ #define ARG15(K, T) ARG14(K, T) ARG##K(T, 14) #define ARG16(K, T) ARG15(K, T) ARG##K(T, 15) #define ARG_c(N) ARG##N( , _c) -#define ARG_uc(N) ARG##N( , _uc) +#define ARG_uc(N) ARG##N( , _c) #define ARG_s(N) ARG##N( , _s) -#define ARG_us(N) ARG##N( , _us) +#define ARG_us(N) ARG##N( , _s) #define ARG_i(N) ARG##N( , _i) -#define ARG_ui(N) ARG##N( , _ui) +#define ARG_ui(N) ARG##N( , _i) #define ARG_l(N) ARG##N( , _l) #define ARG_f(N) ARG##N(F, _f) #define ARG_d(N) ARG##N(F, _d) -#define CHK(N, T, V) \ - getarg %r0 $arg##T##V \ +#define CHK(N, T, TT, V) \ + getarg##T %r0 $arg##TT##V \ ldxi##T %r1 %v0 $(V * szof##T) \ beqr N##T##V %r0 %r1 \ calli @abort \ N##T##V: -#define CHKF(N, T, V) \ - getarg##T %f0 $arg##T##V \ +#define CHKF(N, T, TT, V) \ + getarg##T %f0 $arg##TT##V \ ldxi##T %f1 %v0 $(V * szof##T) \ beqr##T N##T##V %f0 %f1 \ calli @abort \ N##T##V: -#define GET1( K, N, T, V) CHK##K(N, T, 0) -#define GET2( K, N, T, V) GET1( K, N, T, V) CHK##K(N, T, 1) -#define GET3( K, N, T, V) GET2( K, N, T, V) CHK##K(N, T, 2) -#define GET4( K, N, T, V) GET3( K, N, T, V) CHK##K(N, T, 3) -#define GET5( K, N, T, V) GET4( K, N, T, V) CHK##K(N, T, 4) -#define GET6( K, N, T, V) GET5( K, N, T, V) CHK##K(N, T, 5) -#define GET7( K, N, T, V) GET6( K, N, T, V) CHK##K(N, T, 6) -#define GET8( K, N, T, V) GET7( K, N, T, V) CHK##K(N, T, 7) -#define GET9( K, N, T, V) GET8( K, N, T, V) CHK##K(N, T, 8) -#define GET10(K, N, T, V) GET9( K, N, T, V) CHK##K(N, T, 9) -#define GET11(K, N, T, V) GET10(K, N, T, V) CHK##K(N, T, 10) -#define GET12(K, N, T, V) GET11(K, N, T, V) CHK##K(N, T, 11) -#define GET13(K, N, T, V) GET12(K, N, T, V) CHK##K(N, T, 12) -#define GET14(K, N, T, V) GET13(K, N, T, V) CHK##K(N, T, 13) -#define GET15(K, N, T, V) GET14(K, N, T, V) CHK##K(N, T, 14) -#define GET16(K, N, T, V) GET15(K, N, T, V) CHK##K(N, T, 15) +#define GET1( K, N, T, TT, V) CHK##K(N, T, TT, 0) +#define GET2( K, N, T, TT, V) GET1( K, N, T, TT, V) CHK##K(N, T, TT, 1) +#define GET3( K, N, T, TT, V) GET2( K, N, T, TT, V) CHK##K(N, T, TT, 2) +#define GET4( K, N, T, TT, V) GET3( K, N, T, TT, V) CHK##K(N, T, TT, 3) +#define GET5( K, N, T, TT, V) GET4( K, N, T, TT, V) CHK##K(N, T, TT, 4) +#define GET6( K, N, T, TT, V) GET5( K, N, T, TT, V) CHK##K(N, T, TT, 5) +#define GET7( K, N, T, TT, V) GET6( K, N, T, TT, V) CHK##K(N, T, TT, 6) +#define GET8( K, N, T, TT, V) GET7( K, N, T, TT, V) CHK##K(N, T, TT, 7) +#define GET9( K, N, T, TT, V) GET8( K, N, T, TT, V) CHK##K(N, T, TT, 8) +#define GET10(K, N, T, TT, V) GET9( K, N, T, TT, V) CHK##K(N, T, TT, 9) +#define GET11(K, N, T, TT, V) GET10(K, N, T, TT, V) CHK##K(N, T, TT, 10) +#define GET12(K, N, T, TT, V) GET11(K, N, T, TT, V) CHK##K(N, T, TT, 11) +#define GET13(K, N, T, TT, V) GET12(K, N, T, TT, V) CHK##K(N, T, TT, 12) +#define GET14(K, N, T, TT, V) GET13(K, N, T, TT, V) CHK##K(N, T, TT, 13) +#define GET15(K, N, T, TT, V) GET14(K, N, T, TT, V) CHK##K(N, T, TT, 14) +#define GET16(K, N, T, TT, V) GET15(K, N, T, TT, V) CHK##K(N, T, TT, 15) -#define GET_c(N, M) GET##N( , c##N, _c, M) -#define GET_uc(N, M) GET##N( , uc##N, _uc, M) -#define GET_s(N, M) GET##N( , s##N, _s, M) -#define GET_us(N, M) GET##N( , us##N, _us, M) -#define GET_i(N, M) GET##N( , i##N, _i, M) -#define GET_ui(N, M) GET##N( , ui##N, _ui, M) -#define GET_l(N, M) GET##N( , l##N, _l, M) -#define GET_f(N, M) GET##N(F, f##N, _f, M) -#define GET_d(N, M) GET##N(F, d##N, _d, M) +#define GET_c(N, M) GET##N( , c##N, _c, _c, M) +#define GET_uc(N, M) GET##N( , uc##N, _uc, _c, M) +#define GET_s(N, M) GET##N( , s##N, _s, _s, M) +#define GET_us(N, M) GET##N( , us##N, _us, _s, M) +#define GET_i(N, M) GET##N( , i##N, _i, _i, M) +#define GET_ui(N, M) GET##N( , ui##N, _ui, _i, M) +#define GET_l(N, M) GET##N( , l##N, _l, _l, M) +#define GET_f(N, M) GET##N(F, f##N, _f, _f, M) +#define GET_d(N, M) GET##N(F, d##N, _d, _d, M) -#define PUSH( T, V) pushargi V +#define PUSH( T, V) pushargi##T V #define PUSHF( T, V) pushargi##T V #define PUSH0( K, T) /**/ #define PUSH1( K, T) PUSH##K(T, 0) @@ -161,14 +161,14 @@ test##T##_0: \ ret \ epilog -#define DEFN(N, M, T) \ +#define DEFN(N, M, T, TT) \ name test##T##_##N \ test##T##_##N: \ prolog \ arg $argp \ /* stack buffer in %v0 */ \ getarg %v0 $argp \ - ARG##T(N) \ + ARG##TT(N) \ /* validate arguments */ \ GET##T(N, M) \ /* heap buffer in %v1 */ \ @@ -258,24 +258,24 @@ test##T##_17_done: \ ret \ epilog -#define DEF( T) \ +#define DEF( T, TT) \ DEF0( T) \ - DEFN( 1, 0, T) \ - DEFN( 2, 1, T) \ - DEFN( 3, 2, T) \ - DEFN( 4, 3, T) \ - DEFN( 5, 4, T) \ - DEFN( 6, 5, T) \ - DEFN( 7, 6, T) \ - DEFN( 8, 7, T) \ - DEFN( 9, 8, T) \ - DEFN(10, 9, T) \ - DEFN(11, 10, T) \ - DEFN(12, 11, T) \ - DEFN(13, 12, T) \ - DEFN(14, 13, T) \ - DEFN(15, 14, T) \ - DEFN(16, 15, T) \ + DEFN( 1, 0, T, TT) \ + DEFN( 2, 1, T, TT) \ + DEFN( 3, 2, T, TT) \ + DEFN( 4, 3, T, TT) \ + DEFN( 5, 4, T, TT) \ + DEFN( 6, 5, T, TT) \ + DEFN( 7, 6, T, TT) \ + DEFN( 8, 7, T, TT) \ + DEFN( 9, 8, T, TT) \ + DEFN(10, 9, T, TT) \ + DEFN(11, 10, T, TT) \ + DEFN(12, 11, T, TT) \ + DEFN(13, 12, T, TT) \ + DEFN(14, 13, T, TT) \ + DEFN(15, 14, T, TT) \ + DEFN(16, 15, T, TT) \ DEFX(T) #define CALL(T) calli test##T##_17 @@ -321,17 +321,17 @@ memcpy_done: FILLF(_f) FILLF(_d) - DEF(_c) - DEF(_uc) - DEF(_s) - DEF(_us) - DEF(_i) + DEF(_c, _c) + DEF(_uc, _c) + DEF(_s, _s) + DEF(_us, _s) + DEF(_i, _i) #if __WORDSIZE == 64 - DEF(_ui) - DEF(_l) + DEF(_ui, _i) + DEF(_l, _l) #endif - DEF(_f) - DEF(_d) + DEF(_f, _f) + DEF(_d, _d) name main main: diff --git a/deps/lightning/configure.ac b/deps/lightning/configure.ac index 39d22091..3fb09e79 100644 --- a/deps/lightning/configure.ac +++ b/deps/lightning/configure.ac @@ -1,5 +1,5 @@ dnl -dnl Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc. +dnl Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc. dnl dnl This file is part of GNU lightning. dnl @@ -15,7 +15,7 @@ dnl License for more details. dnl AC_PREREQ([2.71]) -AC_INIT([GNU lightning],[2.1.3],[pcpa@gnu.org],[lightning]) +AC_INIT([GNU lightning],[2.2.1],[pcpa@gnu.org],[lightning]) AC_CONFIG_AUX_DIR([build-aux]) AC_CANONICAL_TARGET AC_CONFIG_SRCDIR([Makefile.am]) @@ -60,6 +60,12 @@ case "$target_cpu" in fi ;; *) ;; esac ;; + aarch64) + case "$host_os" in + darwin*) + LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DPACKED_STACK=1" ;; + *) ;; + esac ;; *) ;; esac @@ -163,29 +169,43 @@ if test "x$DEVEL_DISASSEMBLER" != "xno"; then LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEVEL_DISASSEMBLER=1" fi +# This option is only useful during development. +AC_ARG_ENABLE(devel-get-jit-size, + AS_HELP_STRING([--enable-devel-get-jit-size], + [Devel mode to regenerate jit size information]), + [GET_JIT_SIZE=$enableval], [GET_JIT_SIZE=no]) +AM_CONDITIONAL(get_jit_size, [test $GET_JIT_SIZE = yes]) + AC_ARG_ENABLE(assertions, AS_HELP_STRING([--enable-assertions], [Enable runtime code generation assertions]), [DEBUG=$enableval], [DEBUG=auto]) -if test "x$DEBUG" = xyes; then + +# This option might be made default in the future +# Currently it is only useful to ensure existing code will work +# if PACKED_STACK is also defined. +AC_ARG_ENABLE(devel-strong-type-checking, + AS_HELP_STRING([--enable-devel-strong-type-checking], + [Devel mode for strong type checking]), + [STRONG_TYPE_CHECKING=$enableval], [STRONG_TYPE_CHECKING=no]) +if test "x$DEBUG" = xyes -o x"$STRONG_TYPE_CHECKING" = xyes; then LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEBUG=1" else LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DNDEBUG" DEBUG=no fi +AM_CONDITIONAL(strong_type_checking, [test $STRONG_TYPE_CHECKING = yes]) -# This option is only useful during development. -AC_ARG_ENABLE(devel-get-jit-size, - AS_HELP_STRING([--enable-devel-get-jit-size], - [Devel mode to regenerate jit size information]), - [GET_JIT_SIZE=$enableval], [GET_JIT_SIZE=no]) -AM_CONDITIONAL(get_jit_size, [test $GET_JIT_SIZE = yes]) +AC_CHECK_LIB(dl, dlopen, [HAVE_LIBDL="yes"]) +AC_CHECK_LIB(dld, dlopen, [HAVE_LIBDLD="yes"]) -case "$host_os" in - *bsd*|osf*) SHLIB="" ;; - *hpux*) SHLIB="-ldld" ;; - *) SHLIB="-ldl" ;; -esac +if test "x$HAVE_LIBDL" = xyes; then + SHLIB="-ldl"; +elif test "x$HAVE_LIBDLD" = xyes; then + SHLIB="-ldld"; +else + SHLIB=""; +fi AC_SUBST(SHLIB) cpu= @@ -233,7 +253,7 @@ elif test $cpu = x86; then int main(void) { int ac, flags; unsigned int eax, ebx, ecx, edx; - if (__WORDSIZE == 64) + if (sizeof(long) == 8) return 1; __asm__ volatile ("pushfl;\n\t" "popl %0;\n\t" diff --git a/deps/lightning/doc/Makefile.am b/deps/lightning/doc/Makefile.am index 6398bceb..4cec67ea 100644 --- a/deps/lightning/doc/Makefile.am +++ b/deps/lightning/doc/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2012-2022 Free Software Foundation, Inc. +# Copyright 2012-2023 Free Software Foundation, Inc. # # This file is part of GNU lightning. # @@ -14,7 +14,8 @@ # License for more details. # -AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE +AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include \ + -D_GNU_SOURCE $(LIGHTNING_CFLAGS) info_TEXINFOS = lightning.texi MOSTLYCLEANFILES = lightning.tmp diff --git a/deps/lightning/doc/body.texi b/deps/lightning/doc/body.texi index 1d8d2777..1bd3f675 100644 --- a/deps/lightning/doc/body.texi +++ b/deps/lightning/doc/body.texi @@ -101,17 +101,30 @@ the @file{configure} shell script; to run it, merely type: ./configure @end example -@lightning{} supports the @code{--enable-disassembler} option, that -enables linking to GNU binutils and optionally print human readable +The @file{configure} accepts the @code{--enable-disassembler} option, +hat enables linking to GNU binutils and optionally print human readable disassembly of the jit code. This option can be disabled by the @code{--disable-disassembler} option. -Another option that @file{configure} accepts is -@code{--enable-assertions}, which enables several consistency checks in -the run-time assemblers. These are not usually needed, so you can -decide to simply forget about it; also remember that these consistency +@file{configure} also accepts the @code{--enable-devel-disassembler}, +option useful to check exactly hat machine instructions were generated +for a @lightning{} instrction. Basically mixing @code{jit_print} and +@code{jit_disassembly}. + +The @code{--enable-assertions} option, which enables several consistency +hecks in the run-time assemblers. These are not usually needed, so you +can decide to simply forget about it; also remember that these consistency checks tend to slow down your code generator. +The @code{--enable-devel-strong-type-checking} option that does extra type +checking using @code{assert}. This option also enables the +@code{--enable-assertions} unless it is explicitly disabled. + +The option @code{--enable-devel-get-jit-size} should only be used +when doing updates or maintenance to lightning. It regenerates the +@code{jit_$ARCH]-sz.c} creating a table or maximum bytes usage when +translating a @lightning{} instruction to machine code. + After you've configured @lightning{}, run @file{make} as usual. @lightning{} has an extensive set of tests to validate it is working @@ -278,12 +291,27 @@ These accept two operands, both of which must be registers. @example negr _f _d O1 = -O2 comr O1 = ~O2 +clor O1 = number of leading one bits +clzr O1 = number of leading zero bits +ctor O1 = number of trailing one bits +ctzr O1 = number of trailing zero bits @end example +Note that @code{ctzr} is basically equivalent of a @code{C} call +@code{ffs} but indexed at bit zero, not one. + +Contrary to @code{__builtin_ctz} and @code{__builtin_clz}, an input +value of zero is not an error, it just returns the number of bits +in a word, 64 if @lightning{} generates 64 bit instructions, otherwise +it returns 32. + +The @code{clor} and @code{ctor} are just counterparts of the versions +that search for zero bits. + These unary ALU operations are only defined for float operands. @example absr _f _d O1 = fabs(O2) -sqrtr O1 = sqrt(O2) +sqrtr _f _d O1 = sqrt(O2) @end example Besides requiring the @code{r} modifier, there are no unary operations @@ -401,31 +429,33 @@ ldxi _c _uc _s _us _i _ui _l _f _d O1 = *(O2+O3) both cases, the first can be either a register or an immediate value. Values are sign-extended to fit a whole register. @example -str _c _uc _s _us _i _ui _l _f _d *O1 = O2 -sti _c _uc _s _us _i _ui _l _f _d *O1 = O2 -stxr _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3 -stxi _c _uc _s _us _i _ui _l _f _d *(O1+O2) = O3 +str _c _s _i _l _f _d *O1 = O2 +sti _c _s _i _l _f _d *O1 = O2 +stxr _c _s _i _l _f _d *(O1+O2) = O3 +stxi _c _s _i _l _f _d *(O1+O2) = O3 @end example -As for the load operations, the @code{_ui} and @code{_l} types are -only available in 64-bit architectures, and for convenience, there -is a version without a type modifier for integer or pointer operands -that uses the appropriate wordsize call. +Note that the unsigned type modifier is not available, as the store +only writes to the 1, 2, 4 or 8 sized memory address. +The @code{_l} type is only available in 64-bit architectures, and for +convenience, there is a version without a type modifier for integer or +pointer operands that uses the appropriate wordsize call. @item Argument management These are: @example prepare (not specified) va_start (not specified) -pushargr _f _d -pushargi _f _d +pushargr _c _uc _s _us _i _ui _l _f _d +pushargi _c _uc _s _us _i _ui _l _f _d va_push (not specified) -arg _f _d +arg _c _uc _s _us _i _ui _l _f _d getarg _c _uc _s _us _i _ui _l _f _d va_arg _d -putargr _f _d -putargi _f _d +putargr _c _uc _s _us _i _ui _l _f _d +putargi _c _uc _s _us _i _ui _l _f _d ret (not specified) -retr _f _d +retr _c _uc _s _us _i _ui _l _f _d +reti _c _uc _s _us _i _ui _l _f _d reti _f _d va_end (not specified) retval _c _uc _s _us _i _ui _l _f _d @@ -444,6 +474,15 @@ the @code{pushargr} or @code{pushargi} to push the arguments @strong{in left to right order}; and use @code{finish} or @code{call} (explained below) to perform the actual call. +Note that @code{arg}, @code{pusharg}, @code{putarg} and @code{ret} when +handling integer types can be used without a type modifier. +It is suggested to use matching type modifiers to @code{arg}, @code{putarg} +and @code{getarg} otherwise problems will happen if generating jit for +environments that require arguments to be truncated and zero or sign +extended by the caller and/or excess arguments might be passed packed +in the stack. Currently only Apple systems with @code{aarch64} cpus are +known to have this restriction. + @code{va_start} returns a @code{C} compatible @code{va_list}. To fetch arguments, use @code{va_arg} for integers and @code{va_arg_d} for doubles. @code{va_push} is required when passing a @code{va_list} to another function, @@ -565,6 +604,10 @@ bxsubr _u O2 -= O3@r{, goto }O1@r{ if no overflow} bxsubi _u O2 -= O3@r{, goto }O1@r{ if no overflow} @end example +Note that the @code{C} code does not have an @code{O1} argument. It is +required to always use the return value as an argument to @code{patch}, +@code{patch_at} or @code{patch_abs}. + @item Jump and return operations These accept one argument except @code{ret} and @code{jmpi} which have none; the difference between @code{finishi} and @code{calli} @@ -603,6 +646,14 @@ the next instruction, usually with a label: align (not specified) @r{align code} @end example +Similar to @code{align} is the next instruction, also usually used with +a label: +@example +skip (not specified) @r{skip code} +@end example +It is used to specify a minimal number of bytes of nops to be inserted +before the next instruction. + @code{label} is normally used as @code{patch_at} argument for backward jumps. @@ -687,6 +738,10 @@ label2 = jit_indirect(); @rem{/* second entry point */} assert(addr2 - addr1 == 16); @rem{/* only one of the addresses needs to be remembered */} @end example +@code{skip} is useful for reserving space in the code buffer that can +later be filled (possibly with the help of the pair of functions +@code{jit_unprotect} and @code{jit_protect}). + @item Function prolog These macros are used to set up a function prolog. The @code{allocai} @@ -919,7 +974,7 @@ will return non zero if the argument lives in a register. This call is useful to know the live range of register arguments, as those are very fast to read and write, but have volatile values. -@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or +@code{callee_save_p} expects a valid @code{JIT_Rn}, @code{JIT_Vn}, or @code{JIT_Fn}, and will return non zero if the register is callee save. This call is useful because on several ports, the @code{JIT_Rn} and @code{JIT_Fn} registers are actually callee save; no need @@ -1144,26 +1199,13 @@ maps to @code{%g2} on the SPARC). @table @b @item x86_64 @example - sub $0x30,%rsp - mov %rbp,(%rsp) - mov %rsp,%rbp - sub $0x18,%rsp - mov %rdi,%rax mov %rdi, %rax - add $0x1,%rax inc %rax - mov %rbp,%rsp - mov (%rsp),%rbp - add $0x30,%rsp - retq retq + mov %rdi,%rax + add $0x1,%rax + ret @end example -In this case, the main overhead is due to the function's prolog and -epilog, and stack alignment after reserving stack space for word -to/from float conversions or moving data from/to x87 to/from SSE. -Note that besides allocating space to save callee saved registers, -no registers are saved/restored because @lightning{} notices those -registers are not modified. There is currently no logic to detect -if it needs to allocate stack space for type conversions neither -proper leaf function detection, but these are subject to change -(FIXME). +In this case, for the x86 port, @lightning{} has simple optimizations +to understand it is a leaf function, and that it is not required to +create a stack frame nor update the stack pointer. @end table @node printf @@ -1327,7 +1369,7 @@ jit_node_t *compile_rpn(char *expr) in = jit_arg(); stack_ptr = stack_base = jit_allocai (32 * sizeof (int)); - jit_getarg_i(JIT_R2, in); + jit_getarg(JIT_R2, in); while (*expr) @{ char buf[32]; @@ -1680,6 +1722,28 @@ Get the current memory allocation function. Also, unlike the GNU GMP counterpart, it is an error to pass @code{NULL} pointers as arguments. @end deftypefun +@section Protection +Unless an alternate code buffer is used (see below), @code{jit_emit} +set the access protections that the code buffer's memory can be read and +executed, but not modified. One can use the following functions after +@code{jit_emit} but before @code{jit_clear} to temporarily lift the +protection: + +@deftypefun void jit_unprotect () +Changes the access protection that the code buffer's memory can be read and +modified. Before the emitted code can be invoked, @code{jit_protect} +has to be called to reset the change. + +This procedure has no effect when an alternate code buffer (see below) is used. +@end deftypefun + +@deftypefun void jit_protect () +Changes the access protection that the code buffer's memory can be read and +executed. + +This procedure has no effect when an alternate code buffer (see below) is used. +@end deftypefun + @section Alternate code buffer To instruct @lightning{} to use an alternate code buffer it is required to call @code{jit_realize} before @code{jit_emit}, and then query states diff --git a/deps/lightning/doc/rpn.c b/deps/lightning/doc/rpn.c index 81314848..edb3d8e7 100644 --- a/deps/lightning/doc/rpn.c +++ b/deps/lightning/doc/rpn.c @@ -24,7 +24,7 @@ jit_node_t *compile_rpn(char *expr) fn = jit_note(NULL, 0); jit_prolog(); - in = jit_arg(); + in = jit_arg_i(); stack_ptr = stack_base = jit_allocai (32 * sizeof (int)); jit_getarg_i(JIT_R2, in); diff --git a/deps/lightning/include/Makefile.am b/deps/lightning/include/Makefile.am index ce622e20..bd487775 100644 --- a/deps/lightning/include/Makefile.am +++ b/deps/lightning/include/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright 2000, 2001, 2002, 2012-2022 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc. # # This file is part of GNU lightning. # diff --git a/deps/lightning/include/lightning.h.in b/deps/lightning/include/lightning.h.in index 67c6af15..7aa654cb 100644 --- a/deps/lightning/include/lightning.h.in +++ b/deps/lightning/include/lightning.h.in @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -190,6 +190,8 @@ typedef enum { #define jit_align(u) jit_new_node_w(jit_code_align, u) jit_code_live, jit_code_align, jit_code_save, jit_code_load, +#define jit_skip(u) jit_new_node_w(jit_code_skip, u) + jit_code_skip, #define jit_name(u) _jit_name(_jit,u) jit_code_name, #define jit_note(u, v) _jit_note(_jit, u, v) @@ -210,27 +212,80 @@ typedef enum { #define jit_allocar(u, v) _jit_allocar(_jit,u,v) jit_code_allocai, jit_code_allocar, -#define jit_arg() _jit_arg(_jit) - jit_code_arg, +#define jit_arg_c() _jit_arg(_jit, jit_code_arg_c) +#define jit_arg_s() _jit_arg(_jit, jit_code_arg_s) +#define jit_arg_i() _jit_arg(_jit, jit_code_arg_i) +# if __WORDSIZE == 32 +# define jit_arg() jit_arg_i() +#else +# define jit_arg_l() _jit_arg(_jit, jit_code_arg_l) +# define jit_arg() jit_arg_l() +#endif + jit_code_arg_c, jit_code_arg_s, + jit_code_arg_i, jit_code_arg_l, +#if __WORDSIZE == 32 +# define jit_code_arg jit_code_arg_i +#else +# define jit_code_arg jit_code_arg_l +#endif + #define jit_getarg_c(u,v) _jit_getarg_c(_jit,u,v) #define jit_getarg_uc(u,v) _jit_getarg_uc(_jit,u,v) - jit_code_getarg_c, jit_code_getarg_uc, #define jit_getarg_s(u,v) _jit_getarg_s(_jit,u,v) #define jit_getarg_us(u,v) _jit_getarg_us(_jit,u,v) - jit_code_getarg_s, jit_code_getarg_us, #define jit_getarg_i(u,v) _jit_getarg_i(_jit,u,v) #if __WORDSIZE == 32 # define jit_getarg(u,v) jit_getarg_i(u,v) #else -# define jit_getarg(u,v) jit_getarg_l(u,v) # define jit_getarg_ui(u,v) _jit_getarg_ui(_jit,u,v) # define jit_getarg_l(u,v) _jit_getarg_l(_jit,u,v) +# define jit_getarg(u,v) jit_getarg_l(u,v) #endif + jit_code_getarg_c, jit_code_getarg_uc, + jit_code_getarg_s, jit_code_getarg_us, jit_code_getarg_i, jit_code_getarg_ui, jit_code_getarg_l, -# define jit_putargr(u,v) _jit_putargr(_jit,u,v) -# define jit_putargi(u,v) _jit_putargi(_jit,u,v) - jit_code_putargr, jit_code_putargi, +#if __WORDSIZE == 32 +# define jit_code_getarg jit_code_getarg_i +#else +# define jit_code_getarg jit_code_getarg_l +#endif + +#define jit_putargr_c(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_c) +#define jit_putargi_c(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_c) +#define jit_putargr_uc(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_uc) +#define jit_putargi_uc(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_uc) +#define jit_putargr_s(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_s) +#define jit_putargi_s(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_s) +#define jit_putargr_us(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_us) +#define jit_putargi_us(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_us) +#define jit_putargr_i(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_i) +#define jit_putargi_i(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_i) +#if __WORDSIZE == 32 +# define jit_putargr(u,v) jit_putargr_i(u,v) +# define jit_putargi(u,v) jit_putargi_i(u,v) +#else +# define jit_putargr_ui(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_ui) +# define jit_putargi_ui(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_ui) +# define jit_putargr_l(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_l) +# define jit_putargi_l(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_l) +# define jit_putargr(u,v) jit_putargr_l(u,v) +# define jit_putargi(u,v) jit_putargi_l(u,v) +#endif + jit_code_putargr_c, jit_code_putargi_c, + jit_code_putargr_uc, jit_code_putargi_uc, + jit_code_putargr_s, jit_code_putargi_s, + jit_code_putargr_us, jit_code_putargi_us, + jit_code_putargr_i, jit_code_putargi_i, + jit_code_putargr_ui, jit_code_putargi_ui, + jit_code_putargr_l, jit_code_putargi_l, +#if __WORDSIZE == 32 +# define jit_code_putargr jit_code_putargr_i +# define jit_code_putargi jit_code_putargi_i +#else +# define jit_code_putargr jit_code_putargr_l +# define jit_code_putargi jit_code_putargi_l +#endif #define jit_va_start(u) jit_new_node_w(jit_code_va_start, u) jit_code_va_start, @@ -352,6 +407,10 @@ typedef enum { #define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w) jit_code_movnr, jit_code_movzr, + jit_code_casr, jit_code_casi, +#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x) +#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x) + #define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v) #define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v) jit_code_extr_c, jit_code_extr_uc, @@ -364,6 +423,18 @@ typedef enum { #endif jit_code_extr_i, jit_code_extr_ui, +#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v) + jit_code_bswapr_us, +#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) + jit_code_bswapr_ui, +#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) + jit_code_bswapr_ul, +#if __WORDSIZE == 32 +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) +#else +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) +#endif + #define jit_htonr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v) #define jit_ntohr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v) jit_code_htonr_us, @@ -550,33 +621,106 @@ typedef enum { #define jit_prepare() _jit_prepare(_jit) jit_code_prepare, -#define jit_pushargr(u) _jit_pushargr(_jit,u) -#define jit_pushargi(u) _jit_pushargi(_jit,u) - jit_code_pushargr, jit_code_pushargi, + +#define jit_pushargr_c(u) _jit_pushargr(_jit,u,jit_code_pushargr_c) +#define jit_pushargi_c(u) _jit_pushargi(_jit,u,jit_code_pushargi_c) +#define jit_pushargr_uc(u) _jit_pushargr(_jit,u,jit_code_pushargr_uc) +#define jit_pushargi_uc(u) _jit_pushargi(_jit,u,jit_code_pushargi_uc) +#define jit_pushargr_s(u) _jit_pushargr(_jit,u,jit_code_pushargr_s) +#define jit_pushargi_s(u) _jit_pushargi(_jit,u,jit_code_pushargi_s) +#define jit_pushargr_us(u) _jit_pushargr(_jit,u,jit_code_pushargr_us) +#define jit_pushargi_us(u) _jit_pushargi(_jit,u,jit_code_pushargi_us) +#define jit_pushargr_i(u) _jit_pushargr(_jit,u,jit_code_pushargr_i) +#define jit_pushargi_i(u) _jit_pushargi(_jit,u,jit_code_pushargi_i) +#if __WORDSIZE == 32 +# define jit_pushargr(u) jit_pushargr_i(u) +# define jit_pushargi(u) jit_pushargi_i(u) +#else +# define jit_pushargr_ui(u) _jit_pushargr(_jit,u,jit_code_pushargr_ui) +# define jit_pushargi_ui(u) _jit_pushargi(_jit,u,jit_code_pushargi_ui) +# define jit_pushargr_l(u) _jit_pushargr(_jit,u,jit_code_pushargr_l) +# define jit_pushargi_l(u) _jit_pushargi(_jit,u,jit_code_pushargi_l) +# define jit_pushargr(u) jit_pushargr_l(u) +# define jit_pushargi(u) jit_pushargi_l(u) +#endif + jit_code_pushargr_c, jit_code_pushargi_c, + jit_code_pushargr_uc, jit_code_pushargi_uc, + jit_code_pushargr_s, jit_code_pushargi_s, + jit_code_pushargr_us, jit_code_pushargi_us, + jit_code_pushargr_i, jit_code_pushargi_i, + jit_code_pushargr_ui, jit_code_pushargi_ui, + jit_code_pushargr_l, jit_code_pushargi_l, +#if __WORDSIZE == 32 +# define jit_code_pushargr jit_code_pushargr_i +# define jit_code_pushargi jit_code_pushargi_i +#else +# define jit_code_pushargr jit_code_pushargr_l +# define jit_code_pushargi jit_code_pushargi_l +#endif + #define jit_finishr(u) _jit_finishr(_jit,u) #define jit_finishi(u) _jit_finishi(_jit,u) jit_code_finishr, jit_code_finishi, #define jit_ret() _jit_ret(_jit) jit_code_ret, -#define jit_retr(u) _jit_retr(_jit,u) -#define jit_reti(u) _jit_reti(_jit,u) - jit_code_retr, jit_code_reti, + +#define jit_retr_c(u) _jit_retr(_jit,u,jit_code_retr_c) +#define jit_reti_c(u) _jit_reti(_jit,u,jit_code_reti_c) +#define jit_retr_uc(u) _jit_retr(_jit,u,jit_code_retr_uc) +#define jit_reti_uc(u) _jit_reti(_jit,u,jit_code_reti_uc) +#define jit_retr_s(u) _jit_retr(_jit,u,jit_code_retr_s) +#define jit_reti_s(u) _jit_reti(_jit,u,jit_code_reti_s) +#define jit_retr_us(u) _jit_retr(_jit,u,jit_code_retr_us) +#define jit_reti_us(u) _jit_reti(_jit,u,jit_code_reti_us) +#define jit_retr_i(u) _jit_retr(_jit,u,jit_code_retr_i) +#define jit_reti_i(u) _jit_reti(_jit,u,jit_code_reti_i) +#if __WORDSIZE == 32 +# define jit_retr(u) jit_retr_i(u) +# define jit_reti(u) jit_reti_i(u) +#else +# define jit_retr_ui(u) _jit_retr(_jit,u,jit_code_retr_ui) +# define jit_reti_ui(u) _jit_reti(_jit,u,jit_code_reti_ui) +# define jit_retr_l(u) _jit_retr(_jit,u,jit_code_retr_l) +# define jit_reti_l(u) _jit_reti(_jit,u,jit_code_reti_l) +# define jit_retr(u) jit_retr_l(u) +# define jit_reti(u) jit_reti_l(u) +#endif + jit_code_retr_c, jit_code_reti_c, + jit_code_retr_uc, jit_code_reti_uc, + jit_code_retr_s, jit_code_reti_s, + jit_code_retr_us, jit_code_reti_us, + jit_code_retr_i, jit_code_reti_i, + jit_code_retr_ui, jit_code_reti_ui, + jit_code_retr_l, jit_code_reti_l, +#if __WORDSIZE == 32 +# define jit_code_retr jit_code_retr_i +# define jit_code_reti jit_code_reti_i +#else +# define jit_code_retr jit_code_retr_l +# define jit_code_reti jit_code_reti_l +#endif + #define jit_retval_c(u) _jit_retval_c(_jit,u) #define jit_retval_uc(u) _jit_retval_uc(_jit,u) - jit_code_retval_c, jit_code_retval_uc, #define jit_retval_s(u) _jit_retval_s(_jit,u) #define jit_retval_us(u) _jit_retval_us(_jit,u) - jit_code_retval_s, jit_code_retval_us, #define jit_retval_i(u) _jit_retval_i(_jit,u) #if __WORDSIZE == 32 # define jit_retval(u) jit_retval_i(u) #else -# define jit_retval(u) jit_retval_l(u) # define jit_retval_ui(u) _jit_retval_ui(_jit,u) # define jit_retval_l(u) _jit_retval_l(_jit,u) +# define jit_retval(u) jit_retval_l(u) #endif + jit_code_retval_c, jit_code_retval_uc, + jit_code_retval_s, jit_code_retval_us, jit_code_retval_i, jit_code_retval_ui, jit_code_retval_l, +#if __WORDSIZE == 32 +# define jit_code_retval jit_code_retval_i +#else +# define jit_code_retval jit_code_retval_l +#endif #define jit_epilog() _jit_epilog(_jit) jit_code_epilog, @@ -904,21 +1048,13 @@ typedef enum { #define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v) #define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) -#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v) - jit_code_bswapr_us, -#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) - jit_code_bswapr_ui, -#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) - jit_code_bswapr_ul, -#if __WORDSIZE == 32 -#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) -#else -#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) -#endif +#define jit_clor(u,v) jit_new_node_ww(jit_code_clor,u,v) +#define jit_clzr(u,v) jit_new_node_ww(jit_code_clzr,u,v) + jit_code_clor, jit_code_clzr, - jit_code_casr, jit_code_casi, -#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x) -#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x) +#define jit_ctor(u,v) jit_new_node_ww(jit_code_ctor,u,v) +#define jit_ctzr(u,v) jit_new_node_ww(jit_code_ctzr,u,v) + jit_code_ctor, jit_code_ctzr, jit_code_last_code } jit_code_t; @@ -960,7 +1096,8 @@ extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t); extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t); extern void _jit_ellipsis(jit_state_t*); -extern jit_node_t *_jit_arg(jit_state_t*); +extern jit_node_t *_jit_arg(jit_state_t*, jit_code_t); + extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*); @@ -970,19 +1107,24 @@ extern void _jit_getarg_i(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*); #endif -extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*); -extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*); + +extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*, jit_code_t); +extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*, jit_code_t); extern void _jit_prepare(jit_state_t*); extern void _jit_ellipsis(jit_state_t*); extern void _jit_va_push(jit_state_t*, jit_gpr_t); -extern void _jit_pushargr(jit_state_t*, jit_gpr_t); -extern void _jit_pushargi(jit_state_t*, jit_word_t); + +extern void _jit_pushargr(jit_state_t*, jit_gpr_t, jit_code_t); +extern void _jit_pushargi(jit_state_t*, jit_word_t, jit_code_t); + extern void _jit_finishr(jit_state_t*, jit_gpr_t); extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t); extern void _jit_ret(jit_state_t*); -extern void _jit_retr(jit_state_t*, jit_gpr_t); -extern void _jit_reti(jit_state_t*, jit_word_t); + +extern void _jit_retr(jit_state_t*, jit_gpr_t, jit_code_t); +extern void _jit_reti(jit_state_t*, jit_word_t, jit_code_t); + extern void _jit_retval_c(jit_state_t*, jit_gpr_t); extern void _jit_retval_uc(jit_state_t*, jit_gpr_t); extern void _jit_retval_s(jit_state_t*, jit_gpr_t); @@ -992,6 +1134,7 @@ extern void _jit_retval_i(jit_state_t*, jit_gpr_t); extern void _jit_retval_ui(jit_state_t*, jit_gpr_t); extern void _jit_retval_l(jit_state_t*, jit_gpr_t); #endif + extern void _jit_epilog(jit_state_t*); #define jit_patch(u) _jit_patch(_jit,u) @@ -1016,6 +1159,10 @@ extern void _jit_frame(jit_state_t*, jit_int32_t); extern void _jit_tramp(jit_state_t*, jit_int32_t); #define jit_emit() _jit_emit(_jit) extern jit_pointer_t _jit_emit(jit_state_t*); +#define jit_unprotect() _jit_unprotect(_jit) +extern void _jit_unprotect(jit_state_t*); +#define jit_protect() _jit_protect(_jit) +extern void _jit_protect(jit_state_t*); #define jit_print() _jit_print(_jit) extern void _jit_print(jit_state_t*); diff --git a/deps/lightning/include/lightning/jit_aarch64.h b/deps/lightning/include/lightning/jit_aarch64.h index 6a435f1a..30864998 100644 --- a/deps/lightning/include/lightning/jit_aarch64.h +++ b/deps/lightning/include/lightning/jit_aarch64.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -23,6 +23,10 @@ #define JIT_HASH_CONSTS 0 #define JIT_NUM_OPERANDS 3 +#if __APPLE__ +# define PACKED_STACK 1 +#endif + /* * Types */ diff --git a/deps/lightning/include/lightning/jit_alpha.h b/deps/lightning/include/lightning/jit_alpha.h index 35934319..7986c34e 100644 --- a/deps/lightning/include/lightning/jit_alpha.h +++ b/deps/lightning/include/lightning/jit_alpha.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2022 Free Software Foundation, Inc. + * Copyright (C) 2014-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_arm.h b/deps/lightning/include/lightning/jit_arm.h index 8f7278db..0ed95351 100644 --- a/deps/lightning/include/lightning/jit_arm.h +++ b/deps/lightning/include/lightning/jit_arm.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -106,6 +106,9 @@ typedef enum { typedef struct { jit_uint32_t version : 4; + /* this field originally was only used for the 'e' in armv5te. + * it can also be used to force hardware division, if setting + * version to 7, telling it is armv7r or better. */ jit_uint32_t extend : 1; /* only generate thumb instructions for thumb2 */ jit_uint32_t thumb : 1; @@ -117,6 +120,12 @@ typedef struct { * due to some memory ordering constraint not being respected, so, * disable by default */ jit_uint32_t ldrt_strt : 1; + /* assume functions called never match jit instruction set? + * that is libc, gmp, mpfr, etc functions are in thumb mode and jit + * is in arm mode, or the reverse, what may cause a crash upon return + * of that function if generating jit for a relative jump. + */ + jit_uint32_t exchange : 1; } jit_cpu_t; /* diff --git a/deps/lightning/include/lightning/jit_hppa.h b/deps/lightning/include/lightning/jit_hppa.h index afdf21da..df361baa 100644 --- a/deps/lightning/include/lightning/jit_hppa.h +++ b/deps/lightning/include/lightning/jit_hppa.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_ia64.h b/deps/lightning/include/lightning/jit_ia64.h index 7b212b9a..e45818aa 100644 --- a/deps/lightning/include/lightning/jit_ia64.h +++ b/deps/lightning/include/lightning/jit_ia64.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -117,4 +117,13 @@ typedef enum { _NOREG, } jit_reg_t; +typedef struct { + jit_uint32_t clz : 1; +} jit_cpu_t; + +/* + * Initialization + */ +extern jit_cpu_t jit_cpu; + #endif /* _jit_ia64_h */ diff --git a/deps/lightning/include/lightning/jit_loongarch.h b/deps/lightning/include/lightning/jit_loongarch.h index 44982ecc..89b1a862 100644 --- a/deps/lightning/include/lightning/jit_loongarch.h +++ b/deps/lightning/include/lightning/jit_loongarch.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Free Software Foundation, Inc. + * Copyright (C) 2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_mips.h b/deps/lightning/include/lightning/jit_mips.h index a2388c9c..52aebccb 100644 --- a/deps/lightning/include/lightning/jit_mips.h +++ b/deps/lightning/include/lightning/jit_mips.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -25,6 +25,8 @@ #if _MIPS_SIM != _ABIO32 # define NEW_ABI 1 +#else +# define NEW_ABI 0 #endif /* @@ -114,4 +116,13 @@ typedef enum { _NOREG, } jit_reg_t; +typedef struct { + jit_uint32_t release : 4; +} jit_cpu_t; + +/* + * Initialization + */ +extern jit_cpu_t jit_cpu; + #endif /* _jit_mips_h */ diff --git a/deps/lightning/include/lightning/jit_ppc.h b/deps/lightning/include/lightning/jit_ppc.h index d3d25d39..460c491a 100644 --- a/deps/lightning/include/lightning/jit_ppc.h +++ b/deps/lightning/include/lightning/jit_ppc.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -22,6 +22,9 @@ #define JIT_HASH_CONSTS 1 #define JIT_NUM_OPERANDS 3 +#if defined(_AIX) && !defined(_CALL_AIX) && !defined(_CALL_LINUX) +# define _CALL_AIXDESC 1 +#endif /* * Types diff --git a/deps/lightning/include/lightning/jit_private.h b/deps/lightning/include/lightning/jit_private.h index d0420b8b..444a2953 100644 --- a/deps/lightning/include/lightning/jit_private.h +++ b/deps/lightning/include/lightning/jit_private.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -55,6 +55,26 @@ # define HIDDEN /**/ #endif +#if PACKED_STACK || STRONG_TYPE_CHECKING +# define assert_arg_type(code, expect) \ + do assert((code) == (expect)); while (0) +# define assert_putarg_type(code, expect) \ + do \ + assert((((code) - jit_code_putargr_c) >> 2) == \ + ((expect) - jit_code_arg_c)); \ + while (0) +#else +# define assert_arg_type(code, expect) \ + do assert((int)(code) == (int)(expect) || \ + (code) == jit_code_arg); while (0) +# define assert_putarg_type(code, expect) \ + do \ + assert(((((code) - jit_code_putargr_c) >> 2) == \ + ((expect) - jit_code_arg_c)) || \ + ((code) == jit_code_arg)); \ + while (0) +#endif + #define rc(value) jit_class_##value #define rn(reg) (jit_regno(_rvs[jit_regno(reg)].spec)) @@ -174,48 +194,80 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, (!jit_regset_tstbit(&_jitc->regarg, regno) && \ !jit_regset_tstbit(&_jitc->regsav, regno)) -#define jit_inc_synth(code) \ +#define jit_code_inc_synth(code) \ do { \ - (void)jit_new_node(jit_code_##code); \ + (void)jit_new_node(code); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_w(code, u) \ +#define jit_inc_synth(name) \ + jit_code_inc_synth(jit_code_##name) +#define jit_code_inc_synth_w(code, u) \ do { \ - (void)jit_new_node_w(jit_code_##code, u); \ + (void)jit_new_node_w(code, u); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_f(code, u) \ +#define jit_inc_synth_w(name, u) \ + jit_code_inc_synth_w(jit_code_##name, u) +#define jit_code_inc_synth_f(code, u) \ do { \ - (void)jit_new_node_f(jit_code_##code, u); \ + (void)jit_new_node_f(code, u); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_d(code, u) \ +#define jit_inc_synth_f(name, u) \ + jit_code_inc_synth_f(jit_code_##name, u) +#define jit_code_inc_synth_d(code, u) \ do { \ - (void)jit_new_node_d(jit_code_##code, u); \ + (void)jit_new_node_d(code, u); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_ww(code, u, v) \ +#define jit_inc_synth_d(name, u) \ + jit_code_inc_synth_d(jit_code_##name, u) +#define jit_code_inc_synth_ww(code, u, v) \ do { \ - (void)jit_new_node_ww(jit_code_##code, u, v); \ + (void)jit_new_node_ww(code, u, v); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_wp(code, u, v) \ +#define jit_inc_synth_ww(name, u, v) \ + jit_code_inc_synth_ww(jit_code_##name, u, v) +#define jit_code_inc_synth_wp(code, u, v) \ do { \ - (void)jit_new_node_wp(jit_code_##code, u, v); \ + (void)jit_new_node_wp(code, u, v); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_fp(code, u, v) \ +#define jit_inc_synth_wp(name, u, v) \ + jit_code_inc_synth_wp(jit_code_##name, u, v) +#define jit_code_inc_synth_fp(code, u, v) \ do { \ - (void)jit_new_node_fp(jit_code_##code, u, v); \ + (void)jit_new_node_fp(code, u, v); \ jit_synth_inc(); \ } while (0) -#define jit_inc_synth_dp(code, u, v) \ +#define jit_inc_synth_fp(name, u, v) \ + jit_code_inc_synth_fp(jit_code_##name, u, v) +#define jit_code_inc_synth_dp(code, u, v) \ do { \ - (void)jit_new_node_dp(jit_code_##code, u, v); \ + (void)jit_new_node_dp(code, u, v); \ jit_synth_inc(); \ } while (0) +#define jit_inc_synth_dp(name, u, v) \ + jit_code_inc_synth_dp(jit_code_##name, u, v) #define jit_dec_synth() jit_synth_dec() +#define jit_link_alist(node) \ + do { \ + node->link = _jitc->function->alist; \ + _jitc->function->alist = node; \ + } while (0) +#define jit_check_frame() \ + do { \ + if (!_jitc->function->need_frame) { \ + _jitc->again = 1; \ + _jitc->function->need_frame = 1; \ + } \ + } while (0) +#define jit_diffsize() (stack_framesize - _jitc->framesize) +#define jit_framesize() (stack_framesize - jit_diffsize()) +#define jit_selfsize() (_jitc->function->self.size - jit_diffsize()) + #define jit_link_prolog() \ do { \ _jitc->tail->link = _jitc->function->prolog->link; \ @@ -248,8 +300,8 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*, #define jit_class_xpr 0x80000000 /* float / vector */ /* Used on sparc64 where %f0-%f31 can be encode for single float * but %f32 to %f62 only as double precision */ -#define jit_class_sng 0x10000000 /* Single precision float */ -#define jit_class_dbl 0x20000000 /* Only double precision float */ +#define jit_class_sng 0x00010000 /* Single precision float */ +#define jit_class_dbl 0x00020000 /* Only double precision float */ #define jit_regno_patch 0x00008000 /* this is a register * returned by a "user" call * to jit_get_reg() */ @@ -474,9 +526,14 @@ struct jit_function { } call; jit_node_t *prolog; jit_node_t *epilog; + jit_node_t *alist; jit_int32_t *regoff; jit_regset_t regset; jit_int32_t stack; +#if defined(__i386__) || defined(__x86_64__) + jit_int32_t cvt_offset; /* allocai'd offset for x87<->xmm or + * fpr<->gpr transfer using the stack */ +#endif /* Helper for common jit generation pattern, used in GNU Smalltalk * and possibly others, where a static frame layout is required or @@ -485,11 +542,25 @@ struct jit_function { jit_uint32_t define_frame : 1; jit_uint32_t assume_frame : 1; + jit_uint32_t need_frame : 1; /* need frame pointer? */ + jit_uint32_t need_stack : 1; /* need stack pointer? */ + jit_uint32_t need_return : 1; /* not a leaf function */ + /* alloca offset offset */ jit_int32_t aoffoff; /* uses allocar flag */ jit_uint32_t allocar : 1; +#if __arm__ + /* If will, or might use float registers and vfp is not available. + * Use the first 64 bytes always, as the access to the virtual float + * registers use hardcoded instructions that can only reach 64 byte + * displacements, and to keep code simpler, do not use temporaries. */ + jit_uint32_t swf_offset : 1; + /* If need to call C functions for some operation, or variadic function */ + jit_uint32_t save_reg_args : 1; +#endif + /* varargs state offsets */ jit_int32_t vaoff; /* offset of jit_va_list */ jit_int32_t vagp; /* first gp va argument */ @@ -509,6 +580,13 @@ struct jit_compiler { jit_int32_t rout; /* first output register */ jit_int32_t breg; /* base register for prolog/epilog */ #endif +#if __mips__ + struct { + jit_int32_t op; /* pending instruction, candidate + * to be inserted in a delay slot */ + jit_bool_t pend; /* non zero if need to emit op */ + } inst; +#endif #if __mips__ || __ia64__ || __alpha__ || \ (__sparc__ && __WORDSIZE == 64) || __riscv || __loongarch__ jit_int32_t carry; @@ -528,11 +606,14 @@ struct jit_compiler { #endif jit_uint32_t no_data : 1; jit_uint32_t no_note : 1; + jit_int32_t framesize; /* space for callee save registers, + * frame pointer and return address */ jit_int32_t reglen; /* number of registers */ jit_regset_t regarg; /* cannot allocate */ jit_regset_t regsav; /* automatic spill only once */ jit_regset_t reglive; /* known live registers at some point */ jit_regset_t regmask; /* register mask to update reglive */ + jit_regset_t explive; /* explicitly marked as live */ struct { jit_uint8_t *end; } code; @@ -657,6 +738,8 @@ struct jit_state { struct { jit_uint8_t *ptr; jit_word_t length; + /* PROTECTED bytes starting at PTR are mprotect'd. */ + jit_word_t protected; } code; struct { jit_uint8_t *ptr; diff --git a/deps/lightning/include/lightning/jit_riscv.h b/deps/lightning/include/lightning/jit_riscv.h index ad3f76fa..bf59c5b9 100644 --- a/deps/lightning/include/lightning/jit_riscv.h +++ b/deps/lightning/include/lightning/jit_riscv.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 Free Software Foundation, Inc. + * Copyright (C) 2019-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/include/lightning/jit_s390.h b/deps/lightning/include/lightning/jit_s390.h index a28b0dd3..d51cfeca 100644 --- a/deps/lightning/include/lightning/jit_s390.h +++ b/deps/lightning/include/lightning/jit_s390.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -65,4 +65,13 @@ typedef enum { #define JIT_NOREG _NOREG } jit_reg_t; +typedef struct { + jit_uint32_t flogr : 1; +} jit_cpu_t; + +/* + * Initialization + */ +extern jit_cpu_t jit_cpu; + #endif /* _jit_s390_h */ diff --git a/deps/lightning/include/lightning/jit_sparc.h b/deps/lightning/include/lightning/jit_sparc.h index e5988e11..ec21be9c 100644 --- a/deps/lightning/include/lightning/jit_sparc.h +++ b/deps/lightning/include/lightning/jit_sparc.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -99,4 +99,13 @@ typedef enum { _NOREG, } jit_reg_t; +typedef struct { + jit_uint32_t lzcnt : 1; +} jit_cpu_t; + +/* + * Initialization + */ +extern jit_cpu_t jit_cpu; + #endif /* _jit_sparc_h */ diff --git a/deps/lightning/include/lightning/jit_x86.h b/deps/lightning/include/lightning/jit_x86.h index 91f91244..4c480131 100644 --- a/deps/lightning/include/lightning/jit_x86.h +++ b/deps/lightning/include/lightning/jit_x86.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -189,6 +189,10 @@ typedef struct { jit_uint32_t avx : 1; /* lahf/sahf available in 64 bits mode */ jit_uint32_t lahf : 1; + /* lzcnt and tzcnt? */ + jit_uint32_t abm : 1; + /* adcx and adox instructions available? */ + jit_uint32_t adx : 1; } jit_cpu_t; /* diff --git a/deps/lightning/lib/Makefile.am b/deps/lightning/lib/Makefile.am index a30e7fda..44ac4f2e 100644 --- a/deps/lightning/lib/Makefile.am +++ b/deps/lightning/lib/Makefile.am @@ -17,26 +17,31 @@ AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \ -D_GNU_SOURCE $(LIGHTNING_CFLAGS) liblightning_LTLIBRARIES = liblightning.la -liblightning_la_LDFLAGS = -version-info 1:0:0 +liblightning_la_LDFLAGS = -version-info 2:0:0 +AM_CPPFLAGS = if get_jit_size JIT_SIZE_PATH = "$(top_builddir)/jit_$(cpu)-sz.c" -AM_CPPFLAGS=-DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)' +AM_CPPFLAGS += -DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)' +endif +if strong_type_checking +AM_CPPFLAGS += -DSTRONG_TYPE_CHECKING=1 endif liblightningdir = $(libdir) liblightning_la_SOURCES = \ jit_disasm.c \ jit_memory.c \ - jit_names.c \ jit_note.c \ jit_print.c \ jit_size.c \ lightning.c EXTRA_DIST = \ + jit_names.c \ jit_fallback.c \ jit_rewind.c \ + aarch64-logical-immediates.c \ jit_aarch64.c \ jit_aarch64-cpu.c \ jit_aarch64-fpu.c \ diff --git a/deps/lightning/lib/aarch64-logical-immediates.c b/deps/lightning/lib/aarch64-logical-immediates.c new file mode 100644 index 00000000..c1e1ab07 --- /dev/null +++ b/deps/lightning/lib/aarch64-logical-immediates.c @@ -0,0 +1,161 @@ +// AArch64 Logical Immediate Encoding and Decoding +// +// I hereby place this code in the public domain, as per the terms of the +// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/ + +#include +#include + +static inline int nonzeroCountTrailingZeros64(uint64_t n) { + return __builtin_ctzll(n); +} + +static inline int countTrailingZeros64(uint64_t n) { + return n ? nonzeroCountTrailingZeros64(n) : 64; +} + +static inline int nonzeroCountLeadingZeros64(uint64_t n) { + return __builtin_clzll(n); +} + +static inline int nonzeroCountLeadingZeros32(uint32_t n) { + return __builtin_clz(n); +} + +static inline uint64_t rotateRight64(uint64_t v, int n) { + // return __builtin_rotateright64(v, n); + return (v >> (n & 63)) | (v << (-n & 63)); +} + +static inline uint64_t clearTrailingOnes64(uint64_t n) { + return n & (n + 1); +} + +#define ENCODE_FAILED (-1) + +int encodeLogicalImmediate64(uint64_t val) { + // Consider an ARM64 logical immediate as a pattern of "o" ones preceded + // by "z" more-significant zeroes, repeated to fill a 64-bit integer. + // o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This + // part of the pattern is encoded in the fields "imms" and "N". + // + // "immr" encodes a further right rotate of the repeated pattern, allowing + // a wide range of useful bitwise constants to be represented. + // + // (The spec describes the "immr" rotate as rotating the "o + z" bit + // pattern before repeating it to fill 64-bits, but, as it's a repeating + // pattern, rotating afterwards is equivalent.) + + // This encoding is not allowed to represent all-zero or all-one values. + if (val == 0 || ~val == 0) + return ENCODE_FAILED; + + // To detect an immediate that may be encoded in this scheme, we first + // remove the right-rotate, by rotating such that the least significant + // bit is a one and the most significant bit is a zero. + // + // We do this by clearing any trailing one bits, then counting the + // trailing zeroes. This finds an "edge", where zero goes to one. + // We then rotate the original value right by that amount, moving + // the first one to the least significant bit. + + int rotation = countTrailingZeros64(clearTrailingOnes64(val)); + uint64_t normalized = rotateRight64(val, rotation & 63); + + // Now we have normalized the value, and determined the rotation, we can + // determine "z" by counting the leading zeroes, and "o" by counting the + // trailing ones. (These will both be positive, as we already rejected 0 + // and ~0, and rotated the value to start with a zero and end with a one.) + + int zeroes = nonzeroCountLeadingZeros64(normalized); + int ones = nonzeroCountTrailingZeros64(~normalized); + int size = zeroes + ones; + + // Detect the repeating pattern (by comparing every repetition to the + // one next to it, using rotate). + + if (rotateRight64(val, size & 63) != val) + return ENCODE_FAILED; + + // We do not need to further validate size to ensure it is a power of two + // between 2 and 64. The only "minimal" patterns that can repeat to fill a + // 64-bit value must have a length that is a factor of 64 (i.e. it is a + // power of two in the range [1,64]). And our pattern cannot be of length + // one (as we already rejected 0 and ~0). + // + // By "minimal" patterns I refer to patterns which do not themselves + // contain repetitions. For example, '010101' is a non-minimal pattern of + // a non-power-of-two length that can pass the above rotational test. It + // consists of the minimal pattern '01'. All our patterns are minimal, as + // they contain only one contiguous run of ones separated by at least one + // zero. + + // Finally, we encode the values. "rotation" is the amount we rotated + // right by to "undo" the right-rotate encoded in immr, so must be + // negated. + + // size 2: N=0 immr=00000r imms=11110s + // size 4: N=0 immr=0000rr imms=1110ss + // size 8: N=0 immr=000rrr imms=110sss + // size 16: N=0 immr=00rrrr imms=10ssss + // size 32: N=0 immr=0rrrrr imms=0sssss + // size 64: N=1 immr=rrrrrr imms=ssssss + int immr = -rotation & (size - 1); + int imms = -(size << 1) | (ones - 1); + int N = (size >> 6); + + return (N << 12) | (immr << 6) | (imms & 0x3f); +} + +int encodeLogicalImmediate32(uint32_t val) { + return encodeLogicalImmediate64(((uint64_t)val << 32) | val); +} + +// Decoding! + +bool isValidLogicalImmediate64(unsigned val) { + unsigned N = (val >> 12) & 1; + unsigned imms = val & 0x3f; + unsigned pattern = (N << 6) | (~imms & 0x3f); + return (pattern & (pattern - 1)) != 0; +} + +bool isValidLogicalImmediate32(unsigned val) { + unsigned N = (val >> 12) & 1; + return N == 0 && isValidLogicalImmediate64(val); +} + +#define DECODE_FAILED 0 + +// returns DECODE_FAILED (zero) if the encoding is invalid +uint64_t decodeLogicalImmediate64(unsigned val) { + // Fun way to generate the immediates with mask ^ (mask << S) + static const uint64_t mask_lookup[] = { + 0xffffffffffffffff, // size = 64 + 0x00000000ffffffff, // size = 32 + 0x0000ffff0000ffff, // size = 16 + 0x00ff00ff00ff00ff, // size = 8 + 0x0f0f0f0f0f0f0f0f, // size = 4 + 0x3333333333333333, // size = 2 + }; + + unsigned N = (val >> 12) & 1; + int immr = (val >> 6) & 0x3f; + unsigned imms = val & 0x3f; + + unsigned pattern = (N << 6) | (~imms & 0x3f); + + if (!(pattern & (pattern - 1))) return DECODE_FAILED; + + int leading_zeroes = nonzeroCountLeadingZeros32(pattern); + unsigned imms_mask = 0x7fffffff >> leading_zeroes; + uint64_t mask = mask_lookup[leading_zeroes - 25]; + unsigned S = (imms + 1) & imms_mask; + return rotateRight64(mask ^ (mask << S), immr); +} + +uint32_t decodeLogicalImmediate32(unsigned val) { + unsigned N = (val >> 12) & 1; + if (N) return DECODE_FAILED; + return (uint32_t)decodeLogicalImmediate64(val); +} diff --git a/deps/lightning/lib/jit_aarch64-cpu.c b/deps/lightning/lib/jit_aarch64-cpu.c index 35ddabfd..d5e64ad3 100644 --- a/deps/lightning/lib/jit_aarch64-cpu.c +++ b/deps/lightning/lib/jit_aarch64-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -210,7 +210,7 @@ typedef union { jit_int32_t w; # undef ui } instr_t; -# define stack_framesize 160 +# define s26_p(d) ((d) >= -33554432 && (d) <= 33554431) # define ii(i) *_jit->pc.ui++ = i # define ldr(r0,r1) ldr_l(r0,r1) # define ldxr(r0,r1,r2) ldxr_l(r0,r1,r2) @@ -349,6 +349,9 @@ typedef union { # define A64_ORR 0x2a000000 # define A64_MOV 0x2a0003e0 /* AKA orr Rd,xzr,Rm */ # define A64_MVN 0x2a2003e0 +# define A64_CLS 0x5ac01400 +# define A64_CLZ 0x5ac01000 +# define A64_RBIT 0x5ac00000 # define A64_UXTW 0x2a0003e0 /* AKA MOV */ # define A64_EOR 0x4a000000 # define A64_ANDS 0x6a000000 @@ -370,6 +373,9 @@ typedef union { # define MOV(Rd,Rm) ox_x(A64_MOV|XS,Rd,Rm) # define MVN(Rd,Rm) ox_x(A64_MVN|XS,Rd,Rm) # define NEG(Rd,Rm) ox_x(A64_NEG|XS,Rd,Rm) +# define CLS(Rd,Rm) o_xx(A64_CLS|XS,Rd,Rm) +# define CLZ(Rd,Rm) o_xx(A64_CLZ|XS,Rd,Rm) +# define RBIT(Rd,Rm) o_xx(A64_RBIT|XS,Rd,Rm) # define MOVN(Rd,Imm16) ox_h(A64_MOVN|XS,Rd,Imm16) # define MOVN_16(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16) # define MOVN_32(Rd,Imm16) ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16) @@ -584,6 +590,14 @@ static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define negr(r0,r1) NEG(r0,r1) # define comr(r0,r1) MVN(r0,r1) +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0, r1) CLZ(r0,r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); # define andr(r0,r1,r2) AND(r0,r1,r2) # define andi(r0,r1,i0) _andi(_jit,r0,r1,i0) static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -781,12 +795,12 @@ _bmxi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); # define bmci(i0,r0,i1) bmxi(BCC_EQ,i0,r0,i1) # define jmpr(r0) BR(r0) # define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); # define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) BLR(r0) # define calli(i0) _calli(_jit,i0) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(i0) _calli_p(_jit,i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(i0) _prolog(_jit,i0) @@ -802,36 +816,17 @@ static void _patch_at(jit_state_t*,jit_word_t,jit_word_t); #endif #if CODE +/* https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ */ +#include "aarch64-logical-immediates.c" static jit_int32_t logical_immediate(jit_word_t imm) { - /* There are 5334 possible immediate values, but to avoid the - * need of either too complex code or large lookup tables, - * only check for (simply) encodable common/small values */ - switch (imm) { - case -16: return (0xf3b); - case -15: return (0xf3c); - case -13: return (0xf3d); - case -9: return (0xf3e); - case -8: return (0xf7c); - case -7: return (0xf7d); - case -5: return (0xf7e); - case -4: return (0xfbd); - case -3: return (0xfbe); - case -2: return (0xffe); - case 1: return (0x000); - case 2: return (0xfc0); - case 3: return (0x001); - case 4: return (0xf80); - case 6: return (0xfc1); - case 7: return (0x002); - case 8: return (0xf40); - case 12: return (0xf81); - case 14: return (0xfc2); - case 15: return (0x003); - case 16: return (0xf00); - default: return (-1); + jit_int32_t result = encodeLogicalImmediate64(imm); + if (result != ENCODE_FAILED) { + assert(isValidLogicalImmediate64(result)); + return (result & 0xfff); } + return (-1); } static void @@ -912,7 +907,7 @@ static void _o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26) { instr_t i; - assert(Simm26 >= -33554432 && Simm26 <= 33554431); + assert(s26_p(Simm26)); assert(!(Op & ~0xfc000000)); i.w = Op; i.imm26.b = Simm26; @@ -1398,6 +1393,27 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) CSEL(r0, r0, r1, CC_EQ); } +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + comr(r0, r1); + clzr(r0, r0); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + RBIT(r0, r1); + clor(r0, r0); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + RBIT(r0, r1); + clzr(r0, r0); +} + static void _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1850,7 +1866,7 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, retry = _jit->pc.w; LDAXR(r0, r1); eqr(r0, r0, r2); - jump0 = beqi(_jit->pc.w r0, 0); /* beqi done r0 0 */ + jump0 = beqi(_jit->pc.w, r0, 0); /* beqi done r0 0 */ STLXR(r3, r0, r1); jump1 = bnei(_jit->pc.w, r0, 0); /* bnei retry r0 0 */ /* done: */ @@ -2166,20 +2182,22 @@ _bmxi(jit_state_t *_jit, jit_int32_t cc, return (w); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; jit_int32_t reg; - w = (i0 - _jit->pc.w) >> 2; - if (w >= -33554432 && w <= 33554431) - B(w); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s26_p(d)) + B(d); else { reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i0); jmpr(rn(reg)); jit_unget_reg(reg); } + return (w); } static jit_word_t @@ -2194,20 +2212,22 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0) return (w); } -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; jit_int32_t reg; - w = (i0 - _jit->pc.w) >> 2; - if (w >= -33554432 && w <= 33554431) - BL(w); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s26_p(d)) + BL(d); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); callr(rn(reg)); jit_unget_reg(reg); } + return (w); } static jit_word_t @@ -2222,20 +2242,13 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) return (w); } -/* - * prolog and epilog not as "optimized" as one would like, but the - * problem of overallocating stack space to save callee save registers - * exists on all ports, and is still a todo to use a variable - * stack_framesize - * value, what would cause needing to patch some calls, most likely - * the offset of jit_arg* of stack arguments. - */ static void _prolog(jit_state_t *_jit, jit_node_t *node) { - jit_int32_t reg; + jit_int32_t reg, rreg, offs; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; + jit_check_frame(); assert(_jitc->function->self.aoff >= frame); if (_jitc->function->assume_frame) return; @@ -2246,40 +2259,51 @@ _prolog(jit_state_t *_jit, jit_node_t *node) _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ _jitc->function->self.aoff) + 15) & -16; - STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(stack_framesize >> 3)); - MOV_XSP(FP_REGNO, SP_REGNO); -#define SPILL(L, R, O) \ - do { \ - if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) { \ - if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \ - STPI(L, R, SP_REGNO, O); \ - else \ - STRI(L, SP_REGNO, O); \ - } \ - else if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \ - STRI(R, SP_REGNO, O + 1); \ - } while (0) - SPILL(19, 20, 2); - SPILL(21, 22, 4); - SPILL(23, 24, 6); - SPILL(25, 26, 8); - SPILL(27, 28, 10); -#undef SPILL -#define SPILL(R, O) \ - do { \ - if (jit_regset_tstbit(&_jitc->function->regset, _V##R)) \ - stxi_d(O, SP_REGNO, R); \ - } while (0) - SPILL( 8, 96); - SPILL( 9, 104); - SPILL(10, 112); - SPILL(11, 120); - SPILL(12, 128); - SPILL(13, 136); - SPILL(14, 144); - SPILL(15, 152); -#undef SPILL - if (_jitc->function->stack) + + if (!_jitc->function->need_frame) { + /* check if any callee save register needs to be saved */ + for (reg = 0; reg < _jitc->reglen; ++reg) + if (jit_regset_tstbit(&_jitc->function->regset, reg) && + (_rvs[reg].spec & jit_class_sav)) { + jit_check_frame(); + break; + } + } + + if (_jitc->function->need_frame) { + STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(jit_framesize() >> 3)); + MOV_XSP(FP_REGNO, SP_REGNO); + } + /* callee save registers */ + for (reg = 0, offs = 2; reg < jit_size(iregs);) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg])) + break; + } + if (rreg < jit_size(iregs)) { + STPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs); + offs += 2; + reg = rreg + 1; + } + else { + STRI(rn(iregs[reg]), SP_REGNO, offs); + ++offs; + /* No pair found */ + break; + } + } + else + ++reg; + } + for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + stxi_d(offs, SP_REGNO, rn(fregs[reg])); + offs += sizeof(jit_float64_t); + } + } + + if (_jitc->function->stack) subi(SP_REGNO, SP_REGNO, _jitc->function->stack); if (_jitc->function->allocar) { reg = jit_get_reg(jit_class_gpr); @@ -2288,6 +2312,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) jit_unget_reg(reg); } +#if !__APPLE__ if (_jitc->function->self.call & jit_call_varargs) { /* Save gp registers in the save area, if any is a vararg */ for (reg = 8 - _jitc->function->vagp / -8; @@ -2305,53 +2330,55 @@ _prolog(jit_state_t *_jit, jit_node_t *node) stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) + reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg)); } +#endif } static void _epilog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg, rreg, offs; if (_jitc->function->assume_frame) return; if (_jitc->function->stack) MOV_XSP(SP_REGNO, FP_REGNO); -#define LOAD(L, R, O) \ - do { \ - if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) { \ - if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \ - LDPI(L, R, SP_REGNO, O); \ - else \ - LDRI(L, SP_REGNO, O); \ - } \ - else if (jit_regset_tstbit(&_jitc->function->regset, _R##R)) \ - LDRI(R, SP_REGNO, O + 1); \ - } while (0) - LOAD(19, 20, 2); - LOAD(21, 22, 4); - LOAD(23, 24, 6); - LOAD(25, 26, 8); - LOAD(27, 28, 10); -#undef LOAD -#define LOAD(R, O) \ - do { \ - if (jit_regset_tstbit(&_jitc->function->regset, _V##R)) \ - ldxi_d(R, SP_REGNO, O); \ - } while (0) - LOAD( 8, 96); - LOAD( 9, 104); - LOAD(10, 112); - LOAD(11, 120); - LOAD(12, 128); - LOAD(13, 136); - LOAD(14, 144); - LOAD(15, 152); -#undef LOAD - LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, stack_framesize >> 3); + /* callee save registers */ + for (reg = 0, offs = 2; reg < jit_size(iregs);) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg])) + break; + } + if (rreg < jit_size(iregs)) { + LDPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs); + offs += 2; + reg = rreg + 1; + } + else { + LDRI(rn(iregs[reg]), SP_REGNO, offs); + ++offs; + /* No pair found */ + break; + } + } + else + ++reg; + } + for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + ldxi_d(rn(fregs[reg]), SP_REGNO, offs); + offs += sizeof(jit_float64_t); + } + } + + if (_jitc->function->need_frame) + LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, jit_framesize() >> 3); RET(); } static void _vastart(jit_state_t *_jit, jit_int32_t r0) { +#if !__APPLE__ jit_int32_t reg; assert(_jitc->function->self.call & jit_call_varargs); @@ -2362,7 +2389,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0) reg = jit_get_reg(jit_class_gpr); /* Initialize stack pointer to the first stack argument. */ - addi(rn(reg), FP_REGNO, _jitc->function->self.size); + addi(rn(reg), FP_REGNO, jit_selfsize()); stxi(offsetof(jit_va_list_t, stack), r0, rn(reg)); /* Initialize gp top pointer to the first stack argument. */ @@ -2382,11 +2409,16 @@ _vastart(jit_state_t *_jit, jit_int32_t r0) stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg)); jit_unget_reg(reg); +#else + assert(_jitc->function->self.call & jit_call_varargs); + addi(r0, FP_REGNO, jit_selfsize()); +#endif } static void _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +#if !__APPLE__ jit_word_t ge_code; jit_word_t lt_code; jit_int32_t rg0, rg1; @@ -2416,7 +2448,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(rg1); /* Jump over overflow code. */ - lt_code = jmpi_p(_jit->pc.w); + lt_code = jmpi(_jit->pc.w); /* Where to land if argument is in overflow area. */ patch_at(ge_code, _jit->pc.w); @@ -2435,6 +2467,11 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) patch_at(lt_code, _jit->pc.w); jit_unget_reg(rg0); +#else + assert(_jitc->function->self.call & jit_call_varargs); + ldr(r0, r1); + addi(r1, r1, sizeof(jit_word_t)); +#endif } static void @@ -2454,7 +2491,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) ffc = i.w & 0xffc00000; if (fc == A64_B || fc == A64_BL) { d = (label - instr) >> 2; - assert(d >= -33554432 && d <= 33554431); + assert(s26_p(d)); i.imm26.b = d; u.i[0] = i.w; } diff --git a/deps/lightning/lib/jit_aarch64-fpu.c b/deps/lightning/lib/jit_aarch64-fpu.c index 7c405393..3d17e324 100644 --- a/deps/lightning/lib/jit_aarch64-fpu.c +++ b/deps/lightning/lib/jit_aarch64-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -862,6 +862,7 @@ dbopi(ltgt) static void _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { +#if !__APPLE__ jit_word_t ge_code; jit_word_t lt_code; jit_int32_t rg0, rg1; @@ -891,7 +892,7 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(rg1); /* Jump over overflow code. */ - lt_code = jmpi_p(_jit->pc.w); + lt_code = jmpi(_jit->pc.w); /* Where to land if argument is in overflow area. */ patch_at(ge_code, _jit->pc.w); @@ -910,5 +911,10 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) patch_at(lt_code, _jit->pc.w); jit_unget_reg(rg0); +#else + assert(_jitc->function->self.call & jit_call_varargs); + ldr_d(r0, r1); + addi(r1, r1, sizeof(jit_float64_t)); +#endif } #endif diff --git a/deps/lightning/lib/jit_aarch64-sz.c b/deps/lightning/lib/jit_aarch64-sz.c index b1f451f2..3d1ea99a 100644 --- a/deps/lightning/lib/jit_aarch64-sz.c +++ b/deps/lightning/lib/jit_aarch64-sz.c @@ -1,20 +1,25 @@ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 120 +# if PACKED_STACK +#define JIT_INSTR_MAX 96 0, /* data */ 0, /* live */ - 4, /* align */ + 12, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 120, /* prolog */ + 96, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,11 +27,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 44, /* va_start */ - 64, /* va_arg */ - 72, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 12, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 20, /* addi */ @@ -97,29 +114,34 @@ 16, /* movi */ 8, /* movnr */ 8, /* movzr */ + 28, /* casr */ + 36, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ + 8, /* bswapr_us */ + 8, /* bswapr_ui */ + 4, /* bswapr_ul */ 8, /* htonr_us */ 8, /* htonr_ui */ 4, /* htonr_ul */ 4, /* ldr_c */ - 12, /* ldi_c */ + 16, /* ldi_c */ 4, /* ldr_uc */ - 12, /* ldi_uc */ + 16, /* ldi_uc */ 4, /* ldr_s */ - 12, /* ldi_s */ + 16, /* ldi_s */ 4, /* ldr_us */ - 12, /* ldi_us */ + 16, /* ldi_us */ 4, /* ldr_i */ - 12, /* ldi_i */ + 16, /* ldi_i */ 4, /* ldr_ui */ - 12, /* ldi_ui */ + 16, /* ldi_ui */ 4, /* ldr_l */ - 12, /* ldi_l */ + 16, /* ldi_l */ 8, /* ldxr_c */ 20, /* ldxi_c */ 4, /* ldxr_uc */ @@ -135,13 +157,13 @@ 4, /* ldxr_l */ 20, /* ldxi_l */ 4, /* str_c */ - 12, /* sti_c */ + 16, /* sti_c */ 4, /* str_s */ - 12, /* sti_s */ + 16, /* sti_s */ 4, /* str_i */ - 12, /* sti_i */ + 16, /* sti_i */ 4, /* str_l */ - 12, /* sti_l */ + 16, /* sti_l */ 4, /* stxr_c */ 20, /* stxi_c */ 4, /* stxr_s */ @@ -191,17 +213,41 @@ 8, /* bxsubr_u */ 8, /* bxsubi_u */ 4, /* jmpr */ - 20, /* jmpi */ + 4, /* jmpi */ 4, /* callr */ - 20, /* calli */ + 16, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -261,11 +307,11 @@ 4, /* movr_f */ 8, /* movi_f */ 8, /* ldr_f */ - 16, /* ldi_f */ + 20, /* ldi_f */ 8, /* ldxr_f */ 24, /* ldxi_f */ 8, /* str_f */ - 16, /* sti_f */ + 20, /* sti_f */ 8, /* stxr_f */ 24, /* stxi_f */ 8, /* bltr_f */ @@ -352,11 +398,11 @@ 4, /* movr_d */ 12, /* movi_d */ 8, /* ldr_d */ - 16, /* ldi_d */ + 20, /* ldi_d */ 8, /* ldxr_d */ 24, /* ldxi_d */ 8, /* str_d */ - 16, /* sti_d */ + 20, /* sti_d */ 8, /* stxr_d */ 24, /* stxi_d */ 8, /* bltr_d */ @@ -401,9 +447,461 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ + 8, /* clo */ + 4, /* clz */ + 12, /* cto */ + 8, /* ctz */ + +# else /* PACKED_STACK */ +#define JIT_INSTR_MAX 120 + 0, /* data */ + 0, /* live */ + 12, /* align */ + 0, /* save */ + 0, /* load */ + 4, /* skip */ + 0, /* #name */ + 0, /* #note */ + 0, /* label */ + 120, /* prolog */ + 0, /* ellipsis */ + 0, /* va_push */ + 0, /* allocai */ + 0, /* allocar */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ + 0, /* getarg_c */ + 0, /* getarg_uc */ + 0, /* getarg_s */ + 0, /* getarg_us */ + 0, /* getarg_i */ + 0, /* getarg_ui */ + 0, /* getarg_l */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 12, /* va_arg_d */ + 0, /* va_end */ + 4, /* addr */ + 20, /* addi */ + 4, /* addcr */ + 12, /* addci */ + 4, /* addxr */ + 8, /* addxi */ + 4, /* subr */ + 20, /* subi */ + 4, /* subcr */ + 12, /* subci */ + 4, /* subxr */ + 8, /* subxi */ + 24, /* rsbi */ + 4, /* mulr */ + 20, /* muli */ + 12, /* qmulr */ + 20, /* qmuli */ + 12, /* qmulr_u */ + 20, /* qmuli_u */ + 4, /* divr */ + 20, /* divi */ + 4, /* divr_u */ + 12, /* divi_u */ + 20, /* qdivr */ + 16, /* qdivi */ + 20, /* qdivr_u */ + 16, /* qdivi_u */ + 12, /* remr */ + 28, /* remi */ + 12, /* remr_u */ + 20, /* remi_u */ + 4, /* andr */ + 20, /* andi */ + 4, /* orr */ + 20, /* ori */ + 4, /* xorr */ + 20, /* xori */ + 4, /* lshr */ + 4, /* lshi */ + 4, /* rshr */ + 4, /* rshi */ + 4, /* rshr_u */ + 4, /* rshi_u */ + 4, /* negr */ + 4, /* comr */ + 8, /* ltr */ + 8, /* lti */ + 8, /* ltr_u */ + 8, /* lti_u */ + 8, /* ler */ + 8, /* lei */ + 8, /* ler_u */ + 8, /* lei_u */ + 8, /* eqr */ + 8, /* eqi */ + 8, /* ger */ + 8, /* gei */ + 8, /* ger_u */ + 8, /* gei_u */ + 8, /* gtr */ + 8, /* gti */ + 8, /* gtr_u */ + 8, /* gti_u */ + 8, /* ner */ + 8, /* nei */ + 4, /* movr */ + 16, /* movi */ + 8, /* movnr */ + 8, /* movzr */ + 28, /* casr */ + 36, /* casi */ + 4, /* extr_c */ + 4, /* extr_uc */ + 4, /* extr_s */ + 4, /* extr_us */ + 4, /* extr_i */ + 4, /* extr_ui */ 8, /* bswapr_us */ 8, /* bswapr_ui */ 4, /* bswapr_ul */ - 28, /* casr */ - 36, /* casi */ + 8, /* htonr_us */ + 8, /* htonr_ui */ + 4, /* htonr_ul */ + 4, /* ldr_c */ + 16, /* ldi_c */ + 4, /* ldr_uc */ + 16, /* ldi_uc */ + 4, /* ldr_s */ + 16, /* ldi_s */ + 4, /* ldr_us */ + 16, /* ldi_us */ + 4, /* ldr_i */ + 16, /* ldi_i */ + 4, /* ldr_ui */ + 16, /* ldi_ui */ + 4, /* ldr_l */ + 16, /* ldi_l */ + 8, /* ldxr_c */ + 20, /* ldxi_c */ + 4, /* ldxr_uc */ + 20, /* ldxi_uc */ + 4, /* ldxr_s */ + 16, /* ldxi_s */ + 4, /* ldxr_us */ + 16, /* ldxi_us */ + 4, /* ldxr_i */ + 20, /* ldxi_i */ + 4, /* ldxr_ui */ + 16, /* ldxi_ui */ + 4, /* ldxr_l */ + 20, /* ldxi_l */ + 4, /* str_c */ + 16, /* sti_c */ + 4, /* str_s */ + 16, /* sti_s */ + 4, /* str_i */ + 16, /* sti_i */ + 4, /* str_l */ + 16, /* sti_l */ + 4, /* stxr_c */ + 20, /* stxi_c */ + 4, /* stxr_s */ + 20, /* stxi_s */ + 4, /* stxr_i */ + 20, /* stxi_i */ + 4, /* stxr_l */ + 20, /* stxi_l */ + 8, /* bltr */ + 8, /* blti */ + 8, /* bltr_u */ + 8, /* blti_u */ + 8, /* bler */ + 8, /* blei */ + 8, /* bler_u */ + 8, /* blei_u */ + 8, /* beqr */ + 24, /* beqi */ + 8, /* bger */ + 8, /* bgei */ + 8, /* bger_u */ + 8, /* bgei_u */ + 8, /* bgtr */ + 8, /* bgti */ + 8, /* bgtr_u */ + 8, /* bgti_u */ + 8, /* bner */ + 24, /* bnei */ + 8, /* bmsr */ + 8, /* bmsi */ + 8, /* bmcr */ + 8, /* bmci */ + 8, /* boaddr */ + 8, /* boaddi */ + 8, /* boaddr_u */ + 8, /* boaddi_u */ + 8, /* bxaddr */ + 8, /* bxaddi */ + 8, /* bxaddr_u */ + 8, /* bxaddi_u */ + 8, /* bosubr */ + 8, /* bosubi */ + 8, /* bosubr_u */ + 8, /* bosubi_u */ + 8, /* bxsubr */ + 8, /* bxsubi */ + 8, /* bxsubr_u */ + 8, /* bxsubi_u */ + 4, /* jmpr */ + 4, /* jmpi */ + 4, /* callr */ + 16, /* calli */ + 0, /* prepare */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ + 0, /* finishr */ + 0, /* finishi */ + 0, /* ret */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ + 0, /* retval_c */ + 0, /* retval_uc */ + 0, /* retval_s */ + 0, /* retval_us */ + 0, /* retval_i */ + 0, /* retval_ui */ + 0, /* retval_l */ + 96, /* epilog */ + 0, /* arg_f */ + 0, /* getarg_f */ + 0, /* putargr_f */ + 0, /* putargi_f */ + 4, /* addr_f */ + 12, /* addi_f */ + 4, /* subr_f */ + 12, /* subi_f */ + 12, /* rsbi_f */ + 4, /* mulr_f */ + 12, /* muli_f */ + 4, /* divr_f */ + 12, /* divi_f */ + 4, /* negr_f */ + 4, /* absr_f */ + 4, /* sqrtr_f */ + 8, /* ltr_f */ + 16, /* lti_f */ + 8, /* ler_f */ + 16, /* lei_f */ + 8, /* eqr_f */ + 16, /* eqi_f */ + 8, /* ger_f */ + 16, /* gei_f */ + 8, /* gtr_f */ + 16, /* gti_f */ + 8, /* ner_f */ + 16, /* nei_f */ + 8, /* unltr_f */ + 16, /* unlti_f */ + 8, /* unler_f */ + 16, /* unlei_f */ + 16, /* uneqr_f */ + 24, /* uneqi_f */ + 8, /* unger_f */ + 16, /* ungei_f */ + 8, /* ungtr_f */ + 16, /* ungti_f */ + 16, /* ltgtr_f */ + 24, /* ltgti_f */ + 8, /* ordr_f */ + 16, /* ordi_f */ + 8, /* unordr_f */ + 16, /* unordi_f */ + 8, /* truncr_f_i */ + 4, /* truncr_f_l */ + 4, /* extr_f */ + 4, /* extr_d_f */ + 4, /* movr_f */ + 8, /* movi_f */ + 8, /* ldr_f */ + 20, /* ldi_f */ + 8, /* ldxr_f */ + 24, /* ldxi_f */ + 8, /* str_f */ + 20, /* sti_f */ + 8, /* stxr_f */ + 24, /* stxi_f */ + 8, /* bltr_f */ + 16, /* blti_f */ + 8, /* bler_f */ + 16, /* blei_f */ + 8, /* beqr_f */ + 16, /* beqi_f */ + 8, /* bger_f */ + 16, /* bgei_f */ + 8, /* bgtr_f */ + 16, /* bgti_f */ + 8, /* bner_f */ + 16, /* bnei_f */ + 8, /* bunltr_f */ + 16, /* bunlti_f */ + 8, /* bunler_f */ + 16, /* bunlei_f */ + 16, /* buneqr_f */ + 24, /* buneqi_f */ + 8, /* bunger_f */ + 16, /* bungei_f */ + 8, /* bungtr_f */ + 16, /* bungti_f */ + 16, /* bltgtr_f */ + 24, /* bltgti_f */ + 8, /* bordr_f */ + 16, /* bordi_f */ + 8, /* bunordr_f */ + 16, /* bunordi_f */ + 0, /* pushargr_f */ + 0, /* pushargi_f */ + 0, /* retr_f */ + 0, /* reti_f */ + 0, /* retval_f */ + 0, /* arg_d */ + 0, /* getarg_d */ + 0, /* putargr_d */ + 0, /* putargi_d */ + 4, /* addr_d */ + 12, /* addi_d */ + 4, /* subr_d */ + 12, /* subi_d */ + 12, /* rsbi_d */ + 4, /* mulr_d */ + 12, /* muli_d */ + 4, /* divr_d */ + 12, /* divi_d */ + 4, /* negr_d */ + 4, /* absr_d */ + 4, /* sqrtr_d */ + 8, /* ltr_d */ + 16, /* lti_d */ + 8, /* ler_d */ + 16, /* lei_d */ + 8, /* eqr_d */ + 16, /* eqi_d */ + 8, /* ger_d */ + 16, /* gei_d */ + 8, /* gtr_d */ + 16, /* gti_d */ + 8, /* ner_d */ + 16, /* nei_d */ + 8, /* unltr_d */ + 16, /* unlti_d */ + 8, /* unler_d */ + 16, /* unlei_d */ + 16, /* uneqr_d */ + 24, /* uneqi_d */ + 8, /* unger_d */ + 16, /* ungei_d */ + 8, /* ungtr_d */ + 16, /* ungti_d */ + 16, /* ltgtr_d */ + 24, /* ltgti_d */ + 8, /* ordr_d */ + 16, /* ordi_d */ + 8, /* unordr_d */ + 16, /* unordi_d */ + 8, /* truncr_d_i */ + 4, /* truncr_d_l */ + 4, /* extr_d */ + 4, /* extr_f_d */ + 4, /* movr_d */ + 12, /* movi_d */ + 8, /* ldr_d */ + 20, /* ldi_d */ + 8, /* ldxr_d */ + 24, /* ldxi_d */ + 8, /* str_d */ + 20, /* sti_d */ + 8, /* stxr_d */ + 24, /* stxi_d */ + 8, /* bltr_d */ + 16, /* blti_d */ + 8, /* bler_d */ + 16, /* blei_d */ + 8, /* beqr_d */ + 20, /* beqi_d */ + 8, /* bger_d */ + 16, /* bgei_d */ + 8, /* bgtr_d */ + 16, /* bgti_d */ + 8, /* bner_d */ + 16, /* bnei_d */ + 8, /* bunltr_d */ + 16, /* bunlti_d */ + 8, /* bunler_d */ + 16, /* bunlei_d */ + 16, /* buneqr_d */ + 24, /* buneqi_d */ + 8, /* bunger_d */ + 16, /* bungei_d */ + 8, /* bungtr_d */ + 16, /* bungti_d */ + 16, /* bltgtr_d */ + 24, /* bltgti_d */ + 8, /* bordr_d */ + 16, /* bordi_d */ + 8, /* bunordr_d */ + 16, /* bunordi_d */ + 0, /* pushargr_d */ + 0, /* pushargi_d */ + 0, /* retr_d */ + 0, /* reti_d */ + 0, /* retval_d */ + 0, /* movr_w_f */ + 0, /* movr_ww_d */ + 0, /* movr_w_d */ + 0, /* movr_f_w */ + 0, /* movi_f_w */ + 0, /* movr_d_ww */ + 0, /* movi_d_ww */ + 0, /* movr_d_w */ + 0, /* movi_d_w */ + 8, /* clo */ + 4, /* clz */ + 12, /* cto */ + 8, /* ctz */ +# endif #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_aarch64.c b/deps/lightning/lib/jit_aarch64.c index b54d0070..243e677c 100644 --- a/deps/lightning/lib/jit_aarch64.c +++ b/deps/lightning/lib/jit_aarch64.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -17,9 +17,16 @@ * Paulo Cesar Pereira de Andrade */ +/* callee save + * align16(lr+fp+x19+x2[0-8]+v8+v9+v1[0-15]) */ +#define stack_framesize 160 + #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8) #define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) +#if __APPLE__ +typedef jit_pointer_t jit_va_list_t; +#else typedef struct jit_qreg { jit_float64_t l; jit_float64_t h; @@ -52,10 +59,13 @@ typedef struct jit_va_list { jit_qreg_t q6; jit_qreg_t q7; } jit_va_list_t; +#endif /* * Prototypes */ +#define compute_framesize() _compute_framesize(_jit) +static void _compute_framesize(jit_state_t*); #define patch(instr, node) _patch(_jit, instr, node) static void _patch(jit_state_t*,jit_word_t,jit_node_t*); @@ -72,7 +82,11 @@ extern void __clear_cache(void *, void *); */ jit_register_t _rvs[] = { { rc(gpr) | 0x08, "x8" }, +#if __APPLE__ + { 0x12, "x18" }, +#else { rc(gpr) | 0x12, "x18" }, +#endif { rc(gpr) | 0x11, "x17" }, { rc(gpr) | 0x10, "x16" }, { rc(gpr) | 0x09, "x9" }, @@ -138,6 +152,14 @@ jit_register_t _rvs[] = { { _NOREG, "" }, }; +static jit_int32_t iregs[] = { + _R19, _R20, _R21, _R22, _R23, _R24, _R25, _R26, _R27, _R28 +}; + +static jit_int32_t fregs[] = { + _V8, _V9, _V10, _V11, _V12, _V13, _V14, _V15 +}; + /* * Implementation */ @@ -198,6 +220,7 @@ jit_int32_t _jit_allocai(jit_state_t *_jit, jit_int32_t length) { assert(_jitc->function); + jit_check_frame(); switch (length) { case 0: case 1: break; case 2: _jitc->function->self.aoff &= -2; break; @@ -258,20 +281,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -331,7 +352,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -341,6 +362,7 @@ void _jit_ellipsis(jit_state_t *_jit) { jit_inc_synth(ellipsis); + jit_check_frame(); if (_jitc->prepare) { jit_link_prepare(); assert(!(_jitc->function->call.call & jit_call_varargs)); @@ -351,6 +373,7 @@ _jit_ellipsis(jit_state_t *_jit) assert(!(_jitc->function->self.call & jit_call_varargs)); _jitc->function->self.call |= jit_call_varargs; +#if !__APPLE_ /* Allocate va_list like object in the stack, * with enough space to save all argument * registers, and use fixed offsets for them. */ @@ -367,6 +390,7 @@ _jit_ellipsis(jit_state_t *_jit) _jitc->function->vafp = (8 - _jitc->function->self.argf) * -16; else _jitc->function->vafp = 0; +#endif } jit_dec_synth(); } @@ -380,7 +404,7 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; @@ -389,10 +413,22 @@ _jit_arg(jit_state_t *_jit) if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { +#if PACKED_STACK || STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif +#if PACKED_STACK + _jitc->function->self.size += + _jitc->function->self.size & ((1 << (code - jit_code_arg_c)) - 1); +#endif offset = _jitc->function->self.size; +#if PACKED_STACK + _jitc->function->self.size += 1 << (code - jit_code_arg_c); +#else _jitc->function->self.size += sizeof(jit_word_t); +#endif + jit_check_frame(); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -408,8 +444,17 @@ _jit_arg_f(jit_state_t *_jit) if (jit_arg_f_reg_p(_jitc->function->self.argf)) offset = _jitc->function->self.argf++; else { +#if PACKED_STACK + _jitc->function->self.size += + _jitc->function->self.size & (sizeof(jit_float32_t) - 1); +#endif offset = _jitc->function->self.size; +#if PACKED_STACK + _jitc->function->self.size += sizeof(jit_float32_t); +#else _jitc->function->self.size += sizeof(jit_word_t); +#endif + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_f, offset, ++_jitc->function->self.argn); @@ -427,8 +472,13 @@ _jit_arg_d(jit_state_t *_jit) if (jit_arg_f_reg_p(_jitc->function->self.argf)) offset = _jitc->function->self.argf++; else { +#if PACKED_STACK + _jitc->function->self.size += + _jitc->function->self.size & (sizeof(jit_float64_t) - 1); +#endif offset = _jitc->function->self.size; - _jitc->function->self.size += sizeof(jit_word_t); + _jitc->function->self.size += sizeof(jit_float64_t); + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_d, offset, ++_jitc->function->self.argn); @@ -439,111 +489,235 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_c(u, JIT_RA0 - v->u.w); - else - jit_ldxi_c(u, JIT_FP, v->u.w); +#endif + } + else { + jit_node_t *node = jit_ldxi_c(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_uc(u, JIT_RA0 - v->u.w); - else - jit_ldxi_uc(u, JIT_FP, v->u.w); +#endif + } + else { + jit_node_t *node = jit_ldxi_uc(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_s(u, JIT_RA0 - v->u.w); - else - jit_ldxi_s(u, JIT_FP, v->u.w); +#endif + } + else { + jit_node_t *node = jit_ldxi_s(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_us(u, JIT_RA0 - v->u.w); - else - jit_ldxi_us(u, JIT_FP, v->u.w); +#endif + } + else { + jit_node_t *node = jit_ldxi_us(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK || __WORDSIZE == 32 + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_i(u, JIT_RA0 - v->u.w); - else - jit_ldxi_i(u, JIT_FP, v->u.w); +#endif + } + else { + jit_node_t *node = jit_ldxi_i(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } +#if __WORDSIZE == 64 void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_reg_p(v->u.w)) { +#if PACKED_STACK + jit_movr(u, JIT_RA0 - v->u.w); +#else jit_extr_ui(u, JIT_RA0 - v->u.w); - else - jit_ldxi_ui(u, JIT_FP, v->u.w); +#endif + } + else { + jit_node_t *node = jit_ldxi_ui(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); - else - jit_ldxi_l(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_l(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } +#endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); - if (jit_arg_reg_p(v->u.w)) - jit_movr(JIT_RA0 - v->u.w, u); - else - jit_stxi(v->u.w, JIT_FP, u); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); + if (jit_arg_reg_p(v->u.w)) { + jit_int32_t regno = JIT_RA0 - v->u.w; +#if PACKED_STACK + switch (code) { + case jit_code_putargr_c: jit_extr_c(regno, u); break; + case jit_code_putargr_uc: jit_extr_uc(regno, u); break; + case jit_code_putargr_s: jit_extr_s(regno, u); break; + case jit_code_putargr_us: jit_extr_us(regno, u); break; +# if __WORDISZE == 32 + case jit_code_putargr_i: jit_movr(regno, u); break; +# else + case jit_code_putargr_i: jit_extr_i(regno, u); break; + case jit_code_putargr_ui: jit_extr_ui(regno, u); break; + case jit_code_putargr_l: jit_movr(regno, u); break; +# endif + default: abort(); break; + } +#else + jit_movr(regno, u); +#endif + } + else { + jit_node_t *node; +#if PACKED_STACK + switch (code) { + case jit_code_putargr_c: case jit_code_putargr_uc: + node = jit_stxi_c(v->u.w, JIT_FP, u); break; + case jit_code_putargr_s: case jit_code_putargr_us: + node = jit_stxi_s(v->u.w, JIT_FP, u); break; +# if __WORDSIZE == 32 + case jit_code_putargr_i: + node = jit_stxi(v->u.w, JIT_FP, u); break; +# else + case jit_code_putargr_i: case jit_code_putargr_ui: + node = jit_stxi_i(v->u.w, JIT_FP, u); break; + case jit_code_putargr_l: + node = jit_stxi(v->u.w, JIT_FP, u); break; +# endif + default: abort(); break; + } +#else + node = jit_stxi(v->u.w, JIT_FP, u); +#endif + jit_link_alist(node); + } jit_dec_synth(); } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); +#if PACKED_STACK + switch (code) { + case jit_code_putargi_c: u = (jit_int8_t)u; break; + case jit_code_putargi_uc: u = (jit_uint8_t)u; break; + case jit_code_putargi_s: u = (jit_int16_t)u; break; + case jit_code_putargi_us: u = (jit_uint16_t)u; break; +# if __WORDSIZE == 32 + case jit_code_putargi_i: break; +# else + case jit_code_putargi_i: u = (jit_int32_t)u; break; + case jit_code_putargi_ui: u = (jit_uint32_t)u; break; + case jit_code_putargi_l: break; +# endif + default: abort(); break; + } +#endif if (jit_arg_reg_p(v->u.w)) jit_movi(JIT_RA0 - v->u.w, u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(v->u.w, JIT_FP, regno); +#if PACKED_STACK + switch (code) { + case jit_code_putargi_c: case jit_code_putargi_uc: + node = jit_stxi_c(v->u.w, JIT_FP, regno); break; + case jit_code_putargi_s: case jit_code_putargi_us: + node = jit_stxi_s(v->u.w, JIT_FP, regno); break; +# if __WORDSIZE == 32 + case jit_code_putargi_i: + node = jit_stxi(v->u.w, JIT_FP, regno); break; +# else + case jit_code_putargi_i: case jit_code_putargi_ui: + node = jit_stxi_i(v->u.w, JIT_FP, regno); break; + case jit_code_putargi_l: + node = jit_stxi(v->u.w, JIT_FP, regno); break; +# endif + default: abort(); break; + } +#else + node = jit_stxi(v->u.w, JIT_FP, regno); +#endif + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -556,8 +730,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_inc_synth_wp(getarg_f, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr_f(u, JIT_FA0 - v->u.w); - else - jit_ldxi_f(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_f(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -568,8 +744,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_inc_synth_wp(putargr_f, u, v); if (jit_arg_f_reg_p(v->u.w)) jit_movr_f(JIT_FA0 - v->u.w, u); - else - jit_stxi_f(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi_f(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -582,9 +760,11 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) if (jit_arg_f_reg_p(v->u.w)) jit_movi_f(JIT_FA0 - v->u.w, u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); - jit_stxi_f(v->u.w, JIT_FP, regno); + node = jit_stxi_f(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -597,8 +777,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_inc_synth_wp(getarg_d, u, v); if (jit_arg_f_reg_p(v->u.w)) jit_movr_d(u, JIT_FA0 - v->u.w); - else - jit_ldxi_d(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_d(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -609,8 +791,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_inc_synth_wp(putargr_d, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr_d(JIT_FA0 - v->u.w, u); - else - jit_stxi_d(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi_d(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -623,48 +807,161 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) if (jit_arg_reg_p(v->u.w)) jit_movi_d(JIT_FA0 - v->u.w, u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); - jit_stxi_d(v->u.w, JIT_FP, regno); + node = jit_stxi_d(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movr(JIT_RA0 - _jitc->function->call.argi, u); + jit_int32_t regno = JIT_RA0 - _jitc->function->call.argi; +#if PACKED_STACK + switch (code) { + case jit_code_pushargr_c: jit_extr_c(regno, u); break; + case jit_code_pushargr_uc: jit_extr_uc(regno, u); break; + case jit_code_pushargr_s: jit_extr_s(regno, u); break; + case jit_code_pushargr_us: jit_extr_us(regno, u); break; +# if __WORDISZE == 32 + case jit_code_pushargr_i: jit_movr(regno, u); break; +# else + case jit_code_pushargr_i: jit_extr_i(regno, u); break; + case jit_code_pushargr_ui: jit_extr_ui(regno, u); break; + case jit_code_pushargr_l: jit_movr(regno, u); break; +# endif + default: abort(); break; + } +#else + jit_movr(regno, u); +#endif +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(code == jit_code_pushargr); + jit_stxi(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_word_t); + } +#endif ++_jitc->function->call.argi; } else { +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & + ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1); + switch (code) { + case jit_code_pushargr_c: case jit_code_pushargr_uc: + jit_stxi_c(_jitc->function->call.size, JIT_SP, u); + break; + case jit_code_pushargr_s: case jit_code_pushargr_us: + jit_stxi_s(_jitc->function->call.size, JIT_SP, u); + break; +# if __WORDSIZE == 32 + case jit_code_pushargr_i: + jit_stxi(_jitc->function->call.size, JIT_SP, u); + break; +# else + case jit_code_pushargr_i: case jit_code_pushargr_ui: + jit_stxi_i(_jitc->function->call.size, JIT_SP, u); + break; + case jit_code_pushargr_l: + jit_stxi(_jitc->function->call.size, JIT_SP, u); + break; +# endif + default: + abort(); + break; + } + _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2); +#else jit_stxi(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); +#endif + jit_check_frame(); } jit_dec_synth(); } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); +#if PACKED_STACK + switch (code) { + case jit_code_pushargi_c: u = (jit_int8_t)u; break; + case jit_code_pushargi_uc: u = (jit_uint8_t)u; break; + case jit_code_pushargi_s: u = (jit_int16_t)u; break; + case jit_code_pushargi_us: u = (jit_uint16_t)u; break; +# if __WORDSIZE == 32 + case jit_code_pushargi_i: break; +# else + case jit_code_pushargi_i: u = (jit_int32_t)u; break; + case jit_code_pushargi_ui: u = (jit_uint32_t)u; break; + case jit_code_pushargi_l: break; +# endif + default: abort(); break; + } +#endif if (jit_arg_reg_p(_jitc->function->call.argi)) { - jit_movi(JIT_RA0 - _jitc->function->call.argi, u); + regno = JIT_RA0 - _jitc->function->call.argi; + jit_movi(regno, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(code == jit_code_pushargi); + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += sizeof(jit_word_t); + } +#endif ++_jitc->function->call.argi; } else { regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & + ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1); + switch (code) { + case jit_code_pushargi_c: case jit_code_pushargi_uc: + jit_stxi_c(_jitc->function->call.size, JIT_SP, regno); + break; + case jit_code_pushargi_s: case jit_code_pushargi_us: + jit_stxi_s(_jitc->function->call.size, JIT_SP, regno); + break; +# if __WORDSIZE == 32 + case jit_code_pushargi_i: + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + break; +# else + case jit_code_pushargi_i: case jit_code_pushargi_ui: + jit_stxi_i(_jitc->function->call.size, JIT_SP, regno); + break; + case jit_code_pushargi_l: + jit_stxi(_jitc->function->call.size, JIT_SP, regno); + break; +# endif + default: + abort(); + break; + } + _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2); +#else jit_stxi(_jitc->function->call.size, JIT_SP, regno); - jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); +#endif + jit_unget_reg(regno); + jit_check_frame(); } jit_dec_synth(); } @@ -677,11 +974,27 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) jit_link_prepare(); if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(sizeof(jit_float32_t) == sizeof(jit_word_t)); + jit_stxi_f(_jitc->function->call.size, JIT_SP, + JIT_FA0 - _jitc->function->call.argf); + _jitc->function->call.size += sizeof(jit_word_t); + } +#endif ++_jitc->function->call.argf; } else { +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_float32_t) - 1); + jit_stxi_f(_jitc->function->call.size, JIT_SP, u); + _jitc->function->call.size += sizeof(jit_float32_t); +#else jit_stxi_f(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); +#endif + jit_check_frame(); } jit_dec_synth(); } @@ -695,14 +1008,30 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) jit_link_prepare(); if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(sizeof(jit_float32_t) == sizeof(jit_word_t)); + jit_stxi_f(_jitc->function->call.size, JIT_SP, + JIT_FA0 - _jitc->function->call.argf); + _jitc->function->call.size += sizeof(jit_word_t); + } +#endif ++_jitc->function->call.argf; } else { regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_float32_t) - 1); + jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); + _jitc->function->call.size += sizeof(jit_float32_t); +#else jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); - jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); +#endif + jit_unget_reg(regno); + jit_check_frame(); } jit_dec_synth(); } @@ -715,11 +1044,24 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) jit_link_prepare(); if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(sizeof(jit_float64_t) == sizeof(jit_word_t)); + jit_stxi_d(_jitc->function->call.size, JIT_SP, + JIT_FA0 - _jitc->function->call.argf); + _jitc->function->call.size += sizeof(jit_float64_t); + } +#endif ++_jitc->function->call.argf; } else { +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_float64_t) - 1); +#endif jit_stxi_d(_jitc->function->call.size, JIT_SP, u); - _jitc->function->call.size += sizeof(jit_word_t); + _jitc->function->call.size += sizeof(jit_float64_t); + jit_check_frame(); } jit_dec_synth(); } @@ -733,14 +1075,27 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_link_prepare(); if (jit_arg_f_reg_p(_jitc->function->call.argf)) { jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u); +#if __APPLE__ + if (_jitc->function->call.call & jit_call_varargs) { + assert(sizeof(jit_float64_t) == sizeof(jit_word_t)); + jit_stxi_d(_jitc->function->call.size, JIT_SP, + JIT_FA0 - _jitc->function->call.argf); + _jitc->function->call.size += sizeof(jit_float64_t); + } +#endif ++_jitc->function->call.argf; } else { regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_float64_t) - 1); +#endif jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); - _jitc->function->call.size += sizeof(jit_word_t); + _jitc->function->call.size += sizeof(jit_float64_t); + jit_check_frame(); } jit_dec_synth(); } @@ -769,7 +1124,12 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) { jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishr, r0); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_word_t) - 1); +#endif if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; node = jit_callr(r0); @@ -786,7 +1146,12 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) { jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishi, (jit_word_t)i0); +#if PACKED_STACK + _jitc->function->call.size += + _jitc->function->call.size & (sizeof(jit_word_t) - 1); +#endif if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; node = jit_calli(i0); @@ -835,10 +1200,15 @@ void _jit_retval_i(jit_state_t *_jit, jit_int32_t r0) { jit_inc_synth_w(retval_i, r0); +#if __WORDSIZE == 32 + jit_movr(r0, JIT_RET); +#else jit_extr_i(r0, JIT_RET); +#endif jit_dec_synth(); } +#if __WORDSIZE == 64 void _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0) { @@ -851,10 +1221,10 @@ void _jit_retval_l(jit_state_t *_jit, jit_int32_t r0) { jit_inc_synth_w(retval_l, r0); - if (r0 != JIT_RET) - jit_movr(r0, JIT_RET); + jit_movr(r0, JIT_RET); jit_dec_synth(); } +#endif void _jit_retval_f(jit_state_t *_jit, jit_int32_t r0) @@ -886,6 +1256,7 @@ _emit_code(jit_state_t *_jit) jit_node_t *node; jit_uint8_t *data; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1023,6 +1394,9 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; @@ -1070,6 +1444,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_rrr(and,); case_rrw(and,); case_rrr(or,); @@ -1391,6 +1769,7 @@ _emit_code(jit_state_t *_jit) case_brr(bunord, _d); case_brd(bunord); case jit_code_jmpr: + jit_check_frame(); jmpr(rn(node->u.w)); break; case jit_code_jmpi: @@ -1401,17 +1780,26 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s26_p(word)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } - else + else { + jit_check_frame(); jmpi(node->u.w); + } break; case jit_code_callr: + jit_check_frame(); callr(rn(node->u.w)); break; case jit_code_calli: + jit_check_frame(); if (node->flag & jit_flag_node) { temp = node->u.n; assert(temp->code == jit_code_label || @@ -1419,7 +1807,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) calli(temp->u.w); else { - word = calli_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s26_p(word)) + word = calli(_jit->pc.w); + else + word = calli_p(_jit->pc.w); patch(word, node); } } @@ -1430,11 +1823,14 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif undo.patch_offset = _jitc->patches.offset; restart_function: + compute_framesize(); + patch_alist(0); _jitc->again = 0; prolog(node); break; @@ -1450,10 +1846,22 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + undo.func.need_frame = _jitc->function->need_frame; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif _jitc->patches.offset = undo.patch_offset; + patch_alist(1); goto restart_function; } /* remember label is defined */ @@ -1474,11 +1882,23 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1486,16 +1906,34 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: case jit_code_retval_s: case jit_code_retval_us: case jit_code_retval_i: +#if __WORDSIZE == 64 case jit_code_retval_ui: case jit_code_retval_l: +#endif case jit_code_retval_f: case jit_code_retval_d: case jit_code_prepare: case jit_code_finishr: case jit_code_finishi: @@ -1571,6 +2009,23 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) stxi_d(i0, rn(r0), rn(r1)); } +static void +_compute_framesize(jit_state_t *_jit) +{ + jit_int32_t reg; + _jitc->framesize = 16; /* ra+fp */ + for (reg = 0; reg < jit_size(iregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) + _jitc->framesize += sizeof(jit_word_t); + + for (reg = 0; reg < jit_size(fregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) + _jitc->framesize += sizeof(jit_float64_t); + + /* Make sure functions called have a 16 byte aligned stack */ + _jitc->framesize = (_jitc->framesize + 15) & -16; +} + static void _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) { diff --git a/deps/lightning/lib/jit_alpha-cpu.c b/deps/lightning/lib/jit_alpha-cpu.c index 40f31267..91d15c8c 100644 --- a/deps/lightning/lib/jit_alpha-cpu.c +++ b/deps/lightning/lib/jit_alpha-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2022 Free Software Foundation, Inc. + * Copyright (C) 2014-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -320,6 +320,12 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, #define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define negr(r0,r1) NEGQ(r1,r0) # define comr(r0,r1) NOT(r1,r0) +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0, r1) CTLZ(r1, r0) +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) CTTZ(r1, r0) # define addr(r0,r1,r2) ADDQ(r1,r2,r0) # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -637,7 +643,7 @@ static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t); # define jmpr(r0) JMP(_R31_REGNO,r0,0) # define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*, jit_word_t); +static jit_word_t _jmpi(jit_state_t*, jit_word_t); # define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*, jit_word_t); #define callr(r0) _callr(_jit,r0) @@ -825,7 +831,7 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, } again = _jit->pc.w; /* AGAIN */ LDQ_L(r0, r1, 0); /* Load r0 locked */ - jump0 = bner(0, r0, r2); /* bne FAIL r0 r2 */ + jump0 = bner(_jit->pc.w, r0, r2); /* bne FAIL r0 r2 */ movr(r0, r3); /* Move to r0 to attempt to store */ STQ_C(r0, r1, 0); /* r0 is an in/out argument */ jump1 = _jit->pc.w; @@ -840,6 +846,20 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_unget_reg(r1_reg); } +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + comr(r0, r1); + clzr(r0, r0); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + comr(r0, r1); + ctzr(r0, r0); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -2543,7 +2563,7 @@ _bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(t0); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { jit_word_t w; @@ -2553,7 +2573,8 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) if (_s21_p(d)) BR(_R31_REGNO, d); else - (void)jmpi_p(i0); + w = jmpi_p(i0); + return (w); } static jit_word_t diff --git a/deps/lightning/lib/jit_alpha-fpu.c b/deps/lightning/lib/jit_alpha-fpu.c index 5452a1ea..83736b77 100644 --- a/deps/lightning/lib/jit_alpha-fpu.c +++ b/deps/lightning/lib/jit_alpha-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2022 Free Software Foundation, Inc. + * Copyright (C) 2014-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_alpha-sz.c b/deps/lightning/lib/jit_alpha-sz.c index ac314f27..14949b25 100644 --- a/deps/lightning/lib/jit_alpha-sz.c +++ b/deps/lightning/lib/jit_alpha-sz.c @@ -1,10 +1,11 @@ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 168 +#define JIT_INSTR_MAX 88 0, /* data */ 0, /* live */ - 4, /* align */ + 12, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -13,7 +14,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -21,8 +25,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 20, /* va_start */ 24, /* va_arg */ 44, /* va_arg_d */ @@ -96,12 +112,17 @@ 32, /* movi */ 4, /* movnr */ 4, /* movzr */ + 32, /* casr */ + 60, /* casi */ 8, /* extr_c */ 8, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 8, /* extr_i */ 8, /* extr_ui */ + 16, /* bswapr_us */ + 36, /* bswapr_ui */ + 36, /* bswapr_ul */ 16, /* htonr_us */ 36, /* htonr_ui */ 36, /* htonr_ul */ @@ -190,17 +211,41 @@ 16, /* bxsubr_u */ 16, /* bxsubi_u */ 4, /* jmpr */ - 36, /* jmpi */ + 4, /* jmpi */ 8, /* callr */ 36, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -400,9 +445,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 16, /* bswapr_us */ - 36, /* bswapr_ui */ - 36, /* bswapr_ul */ - 32, /* casr */ - 60, /* casi */ + 8, /* clo */ + 4, /* clz */ + 8, /* cto */ + 4, /* ctz */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_alpha.c b/deps/lightning/lib/jit_alpha.c index 678d5c6e..25566f41 100644 --- a/deps/lightning/lib/jit_alpha.c +++ b/deps/lightning/lib/jit_alpha.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2022 Free Software Foundation, Inc. + * Copyright (C) 2014-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -246,20 +246,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -319,7 +317,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -361,18 +359,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function != NULL); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += 8; } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -417,7 +419,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _A0 - v->u.w); @@ -429,7 +431,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _A0 - v->u.w); @@ -441,7 +443,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _A0 - v->u.w); @@ -453,7 +455,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _A0 - v->u.w); @@ -465,7 +467,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_i(u, _A0 - v->u.w); @@ -477,7 +479,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _A0 - v->u.w); @@ -489,7 +491,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _A0 - v->u.w); @@ -499,10 +501,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_A0 - v->u.w, u); else @@ -511,11 +513,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_A0 - v->u.w, u); else { @@ -610,10 +612,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function != NULL); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(_A0 - _jitc->function->call.argi, u); @@ -627,11 +629,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_int64_t u) +_jit_pushargi(jit_state_t *_jit, jit_int64_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function != NULL); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(_A0 - _jitc->function->call.argi, u); @@ -863,6 +865,7 @@ _emit_code(jit_state_t *_jit) jit_node_t *node; jit_uint8_t *data; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -983,6 +986,9 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; @@ -1124,6 +1130,10 @@ _emit_code(jit_state_t *_jit) break; case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_rrr(lt,); case_rrw(lt,); case_rrr(lt, _u); @@ -1361,7 +1371,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (_s21_p(word)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1390,6 +1405,7 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif @@ -1410,6 +1426,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif @@ -1434,11 +1460,18 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1446,10 +1479,22 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: diff --git a/deps/lightning/lib/jit_arm-cpu.c b/deps/lightning/lib/jit_arm-cpu.c index 12f9a2f7..a0852a2c 100644 --- a/deps/lightning/lib/jit_arm-cpu.c +++ b/deps/lightning/lib/jit_arm-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -37,8 +37,7 @@ # define jit_armv5e_p() (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend)) # define jit_armv6_p() (jit_cpu.version >= 6) # define jit_armv7_p() (jit_cpu.version >= 7) -# define jit_armv7r_p() 0 -# define stack_framesize 48 +# define jit_armv7r_p() (jit_cpu.version > 7 || (jit_cpu.version == 7 && jit_cpu.extend)) extern int __aeabi_idivmod(int, int); extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define _R0_REGNO 0x00 @@ -141,8 +140,12 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define THUMB2_UMULL 0xfba00000 # define ARM_SMULL 0x00c00090 # define THUMB2_SMULL 0xfb800000 +/* >> ARMv7r */ +# define ARM_SDIV 0x07100010 +# define ARM_UDIV 0x07300010 # define THUMB2_SDIV 0xfb90f0f0 # define THUMB2_UDIV 0xfbb0f0f0 +/* << ARMv7r */ # define ARM_AND 0x00000000 # define THUMB_AND 0x4000 # define THUMB2_AND 0xea000000 @@ -185,6 +188,12 @@ extern unsigned __aeabi_uidivmod(unsigned, unsigned); # define ARM_STREX 0x01800090 # define THUMB2_STREX 0xe8400000 /* << ARMv6* */ +/* >> ARMv6t2 */ +# define THUMB2_CLZ 0xfab0f080 +# define THUMB2_RBIT 0xfa90f0a0 +# define ARM_RBIT 0x06f00030 +/* << ARMv6t2 */ +# define ARM_CLZ 0x01600010 /* >> ARMv7 */ # define ARM_DMB 0xf57ff050 # define THUMB2_DMB 0xf3bf8f50 @@ -447,6 +456,12 @@ static void _tdmb(jit_state_t *_jit, int im); # define NOT(rd,rm) CC_NOT(ARM_CC_AL,rd,rm) # define T1_NOT(rd,rm) T1_MVN(rd,rm) # define T2_NOT(rd,rm) T2_MVN(rd,rm) +# define T2_CLZ(rd,rm) torrr(THUMB2_CLZ,rm,rd,rm) +# define CC_CLZ(cc,rd,rm) corrrr(cc,ARM_CLZ,_R15_REGNO,rd,_R15_REGNO,rm) +# define CLZ(rd,rm) CC_CLZ(ARM_CC_AL,rd,rm) +# define T2_RBIT(rd,rm) torrr(THUMB2_RBIT,rm,rd,rm) +# define CC_RBIT(cc,rd,rm) corrrr(cc,ARM_RBIT,_R15_REGNO,rd,_R15_REGNO,rm) +# define RBIT(rd,rm) CC_RBIT(ARM_CC_AL,rd,rm) # define NOP() MOV(_R0_REGNO, _R0_REGNO) # define T1_NOP() is(0xbf00) # define CC_ADD(cc,rd,rn,rm) corrr(cc,ARM_ADD,rn,rd,rm) @@ -524,6 +539,10 @@ static void _tdmb(jit_state_t *_jit, int im); # define CC_UMULL(cc,rl,rh,rn,rm) corrrr(cc,ARM_UMULL,rh,rl,rm,rn) # define UMULL(rl,rh,rn,rm) CC_UMULL(ARM_CC_AL,rl,rh,rn,rm) # define T2_UMULL(rl,rh,rn,rm) torrrr(THUMB2_UMULL,rn,rl,rh,rm) +# define CC_SDIV(cc,rd,rn,rm) corrrr(cc,ARM_SDIV,rd,15,rn,rm) +# define SDIV(rd,rn,rm) CC_SDIV(ARM_CC_AL,rd,rm,rn) +# define CC_UDIV(cc,rd,rn,rm) corrrr(cc,ARM_UDIV,rd,15,rn,rm) +# define UDIV(rd,rn,rm) CC_UDIV(ARM_CC_AL,rd,rm,rn) # define T2_SDIV(rd,rn,rm) torrr(THUMB2_SDIV,rn,rd,rm) # define T2_UDIV(rd,rn,rm) torrr(THUMB2_UDIV,rn,rd,rm) # define CC_AND(cc,rd,rn,rm) corrr(cc,ARM_AND,rn,rd,rm) @@ -852,6 +871,8 @@ static void _tdmb(jit_state_t *_jit, int im); # define T2_POP(im) tpp(THUMB2_POP,im) # define jit_get_reg_args() \ do { \ + CHECK_REG_ARGS(); \ + jit_check_frame(); \ (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr); \ (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr); \ (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr); \ @@ -885,6 +906,14 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, static void _comr(jit_state_t*,jit_int32_t,jit_int32_t); # define negr(r0,r1) _negr(_jit,r0,r1) static void _negr(jit_state_t*,jit_int32_t,jit_int32_t); +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0, r1) _clzr(_jit, r0, r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); # define addr(r0,r1,r2) _addr(_jit,r0,r1,r2) static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) @@ -1147,10 +1176,10 @@ static void _prolog(jit_state_t*,jit_node_t*); static void _epilog(jit_state_t*,jit_node_t*); # define callr(r0) _callr(_jit,r0) static void _callr(jit_state_t*,jit_int32_t); -# define calli(i0) _calli(_jit,i0) -static void _calli(jit_state_t*,jit_word_t); -# define calli_p(i0) _calli_p(_jit,i0) -static jit_word_t _calli_p(jit_state_t*,jit_word_t); +# define calli(i0,i1) _calli(_jit,i0,i1) +static void _calli(jit_state_t*,jit_word_t,jit_bool_t); +# define calli_p(i0,i1) _calli_p(_jit,i0,i1) +static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_bool_t); # define vastart(r0) _vastart(_jit, r0) static void _vastart(jit_state_t*, jit_int32_t); # define vaarg(r0, r1) _vaarg(_jit, r0, r1) @@ -1526,7 +1555,7 @@ _tpp(jit_state_t *_jit, int o, int im) assert(!(o & 0x0000ffff)); if (o == THUMB2_PUSH) assert(!(im & 0x8000)); - assert(__builtin_popcount(im & 0x1fff) > 1); + assert(__builtin_popcount(im & 0x7fff) > 1); thumb.i = o|im; iss(thumb.s[0], thumb.s[1]); } @@ -1737,6 +1766,53 @@ _negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) RSBI(r0, r1, 0); } +static void +_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (!jit_thumb_p() && jit_armv5e_p()) + CLZ(r0, r1); + else if (jit_thumb_p() && jit_armv7_p()) { /* armv6t2 actually */ + T2_CLZ(r0, r1); + } + else + fallback_clz(r0, r0); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + comr(r0, r1); + clzr(r0, r0); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_armv7_p()) { /* armv6t2 actually */ + if (jit_thumb_p()) + T2_RBIT(r0, r1); + else + RBIT(r0, r1); + clor(r0, r0); + } + else + fallback_cto(r0, r1); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_armv7_p()) { /* armv6t2 actually */ + if (jit_thumb_p()) + T2_RBIT(r0, r1); + else + RBIT(r0, r1); + clzr(r0, r0); + } + else + fallback_ctz(r0, r1); +} + static void _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -2232,8 +2308,12 @@ _divrem(jit_state_t *_jit, int div, int sign, static void _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - if (jit_armv7r_p() && jit_thumb_p()) - T2_SDIV(r0, r1, r2); + if (jit_armv7r_p()) { + if (jit_thumb_p()) + T2_SDIV(r0, r1, r2); + else + SDIV(r0, r1, r2); + } else divrem(1, 1, r0, r1, r2); } @@ -2251,8 +2331,12 @@ _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - if (jit_armv7r_p() && jit_thumb_p()) - T2_UDIV(r0, r1, r2); + if (jit_armv7r_p()) { + if (jit_thumb_p()) + T2_UDIV(r0, r1, r2); + else + UDIV(r0, r1, r2); + } else divrem(1, 0, r0, r1, r2); } @@ -2312,7 +2396,23 @@ _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, static void _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - divrem(0, 1, r0, r1, r2); + if (jit_armv7r_p()) { + jit_int32_t reg; + if (r0 == r1 || r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + divr(rn(reg), r1, r2); + mulr(rn(reg), r2, rn(reg)); + subr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + else { + divr(r0, r1, r2); + mulr(r0, r2, r0); + subr(r0, r1, r0); + } + } + else + divrem(0, 1, r0, r1, r2); } static void @@ -2328,7 +2428,23 @@ _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - divrem(0, 0, r0, r1, r2); + if (jit_armv7r_p()) { + jit_int32_t reg; + if (r0 == r1 || r0 == r2) { + reg = jit_get_reg(jit_class_gpr); + divr_u(rn(reg), r1, r2); + mulr(rn(reg), r2, rn(reg)); + subr(r0, r1, rn(reg)); + jit_unget_reg(reg); + } + else { + divr_u(r0, r1, r2); + mulr(r0, r2, r0); + subr(r0, r1, r0); + } + } + else + divrem(0, 0, r0, r1, r2); } static void @@ -2741,8 +2857,8 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1) jit_word_t w; jit_word_t d; jit_int32_t reg; + /* i1 means jump is reachable in signed 24 bits */ if (i1) { - /* Assume jump is not longer than 23 bits if inside jit */ w = _jit->pc.w; /* if thumb and in thumb mode */ if (jit_thumb_p() && _jitc->thumb) { @@ -3835,14 +3951,29 @@ _callr(jit_state_t *_jit, jit_int32_t r0) } static void -_calli(jit_state_t *_jit, jit_word_t i0) +_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t exchange_p) { jit_word_t d; jit_int32_t reg; - d = ((i0 - _jit->pc.w) >> 2) - 2; - if (!jit_exchange_p() && !jit_thumb_p() && _s24P(d)) - BLI(d & 0x00ffffff); + if (!exchange_p) { + if (jit_thumb_p()) { + if (jit_exchange_p()) + /* skip switch from arm to thumb + * exchange_p set to zero means a jit function + * call in the same jit code buffer */ + d = ((i0 + 8 - _jit->pc.w) >> 1) - 2; + else + d = ((i0 - _jit->pc.w) >> 1) - 2; + } + else d = ((i0 - _jit->pc.w) >> 2) - 2; + if (_s24P(d)) { + if (jit_thumb_p()) T2_BLI(encode_thumb_jump(d)); + else BLI(d & 0x00ffffff); + } + else goto fallback; + } else { + fallback: reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); if (jit_thumb_p()) @@ -3854,28 +3985,44 @@ _calli(jit_state_t *_jit, jit_word_t i0) } static jit_word_t -_calli_p(jit_state_t *_jit, jit_word_t i0) +_calli_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1) { jit_word_t w; + jit_word_t d; jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - w = _jit->pc.w; - movi_p(rn(reg), i0); - if (jit_thumb_p()) - T1_BLX(rn(reg)); - else - BLX(rn(reg)); - jit_unget_reg(reg); + /* i1 means call is reachable in signed 24 bits */ + if (i1) { + w = _jit->pc.w; + if (jit_thumb_p()) d = ((i0 - _jit->pc.w) >> 1) - 2; + else d = ((i0 - _jit->pc.w) >> 2) - 2; + assert(_s24P(d)); + if (jit_thumb_p()) T2_BLI(encode_thumb_jump(d)); + else BLI(d & 0x00ffffff); + } + else { + reg = jit_get_reg(jit_class_gpr); + w = _jit->pc.w; + movi_p(rn(reg), i0); + if (jit_thumb_p()) + T1_BLX(rn(reg)); + else + BLX(rn(reg)); + jit_unget_reg(reg); + } return (w); } static void _prolog(jit_state_t *_jit, jit_node_t *node) { - jit_int32_t reg; + jit_int32_t reg, mask, count; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; + jit_check_frame(); assert(_jitc->function->self.aoff >= frame); + if (jit_swf_p()) + CHECK_SWF_OFFSET(); + CHECK_REG_ARGS(); if (_jitc->function->assume_frame) { if (jit_thumb_p() && !_jitc->thumb) _jitc->thumb = _jit->pc.w; @@ -3888,38 +4035,66 @@ _prolog(jit_state_t *_jit, jit_node_t *node) _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; + /* If this jit_check_frame() succeeds, it actually is just a need_stack, + * usually for arguments, so, allocai was not called, but pusharg* + * was called increasing stack size, for negative access offsets. + * This can be optimized for one less prolog instruction, that is, + * do not create the frame pointer, and only add _jitc->function->stack + * to sp, and on epilog, instead of moving fp to sp, just add negative + * value of _jitc->function->stack. Since this condition requires a + * large function body for excess arguments to called function, keep + * things a bit simpler for now, as this is the only place need_stack + * would be useful. */ + if (_jitc->function->stack) + jit_check_frame(); + + for (reg = mask = count = 0; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + mask |= 1 << rn(iregs[reg]); + ++count; + } + } + /* One extra register to keep stack 8 bytes aligned */ + if (count & 1) { + for (reg = 4; reg < 10; reg++) { + if (!(mask & (1 << reg))) { + mask |= 1 << reg; + break; + } + } + } + if (_jitc->function->need_frame || _jitc->function->need_return) + mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO); + if (!jit_swf_p() && _jitc->function->save_reg_args && + !(_jitc->function->self.call & jit_call_varargs)) + mask |= 0xf; if (jit_thumb_p()) { /* switch to thumb mode (better approach would be to * ORR 1 address being called, but no clear distinction * of what is a pointer to a jit function, or if patching * a pointer to a jit function) */ - ADDI(_R12_REGNO, _R15_REGNO, 1); - BX(_R12_REGNO); + if (jit_exchange_p()) { + ADDI(_R12_REGNO, _R15_REGNO, 1); + BX(_R12_REGNO); + } if (!_jitc->thumb) _jitc->thumb = _jit->pc.w; - if (jit_cpu.abi) { - T2_PUSH(0xf); - T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO)); - VPUSH_F64(_D8_REGNO, 8); - } - else { + if (jit_swf_p() || (_jitc->function->save_reg_args && + (_jitc->function->self.call & jit_call_varargs))) T2_PUSH(0xf); - T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO)); - } + if (mask) + T2_PUSH(mask); } else { - if (jit_cpu.abi) { - PUSH(0xf); - PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO)); - VPUSH_F64(_D8_REGNO, 8); - } - else { + if (jit_swf_p() || (_jitc->function->save_reg_args && + (_jitc->function->self.call & jit_call_varargs))) PUSH(0xf); - PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO)); - } + if (mask) + PUSH(mask); } - movr(_FP_REGNO, _SP_REGNO); + if (_jitc->function->need_frame) + movr(_FP_REGNO, _SP_REGNO); if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); if (_jitc->function->allocar) { @@ -3933,17 +4108,41 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg, mask, count; if (_jitc->function->assume_frame) return; - movr(_SP_REGNO, _FP_REGNO); - if (jit_cpu.abi) - VPOP_F64(_D8_REGNO, 8); - if (jit_thumb_p()) - T2_POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO)); - else - POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO)); - addi(_SP_REGNO, _SP_REGNO, 16); + for (reg = mask = count = 0; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + mask |= 1 << rn(iregs[reg]); + ++count; + } + } + /* One extra register to keep stack 8 bytes aligned */ + if (count & 1) { + for (reg = 4; reg < 10; reg++) { + if (!(mask & (1 << reg))) { + mask |= 1 << reg; + break; + } + } + } + if (_jitc->function->need_frame || _jitc->function->need_return) + mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO); + if (_jitc->function->need_frame) + movr(_SP_REGNO, _FP_REGNO); + if (!jit_swf_p() && _jitc->function->save_reg_args && + !(_jitc->function->self.call & jit_call_varargs)) + addi(_SP_REGNO, _SP_REGNO, 16); + if (mask) { + if (jit_thumb_p()) + T2_POP(mask); + else + POP(mask); + } + if (jit_swf_p() || (_jitc->function->save_reg_args && + (_jitc->function->self.call & jit_call_varargs))) + addi(_SP_REGNO, _SP_REGNO, 16); if (jit_thumb_p()) T1_BX(_LR_REGNO); else @@ -3961,8 +4160,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0) * The -16 is to account for the 4 argument registers * always saved, and _jitc->function->vagp is to account * for declared arguments. */ - addi(r0, _FP_REGNO, _jitc->function->self.size - - 16 + _jitc->function->vagp); + addi(r0, _FP_REGNO, jit_selfsize() - 16 + _jitc->function->vagp); } static void @@ -3989,7 +4187,28 @@ _patch_at(jit_state_t *_jit, jit_word_t w; } u; u.w = instr; - if (kind == arm_patch_jump) { + if (kind == arm_patch_call) { + if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) { + code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]); + assert((thumb.i & THUMB2_BLI) == THUMB2_BLI); + /* skip code to switch from arm to thumb mode */ + if (jit_exchange_p()) + d = ((label + 8 - instr) >> 1) - 2; + else + d = ((label - instr) >> 1) - 2; + assert(_s24P(d)); + thumb.i = THUMB2_BLI | encode_thumb_jump(d); + thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]); + } + else { + thumb.i = u.i[0]; + assert((thumb.i & 0x0f000000) == ARM_BLI); + d = ((label - instr) >> 2) - 2; + assert(_s24P(d)); + u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff); + } + } + else if (kind == arm_patch_jump) { if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) { code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]); if ((thumb.i & THUMB2_B) == THUMB2_B) { diff --git a/deps/lightning/lib/jit_arm-swf.c b/deps/lightning/lib/jit_arm-swf.c index c88f9e3c..2aa6a12e 100644 --- a/deps/lightning/lib/jit_arm-swf.c +++ b/deps/lightning/lib/jit_arm-swf.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -564,6 +564,8 @@ _swf_ff(jit_state_t *_jit, float(*i0)(float), jit_int32_t r0, jit_int32_t r1) { jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); else @@ -581,6 +583,8 @@ _swf_dd(jit_state_t *_jit, double (*i0)(double), jit_int32_t r0, jit_int32_t r1) { jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); @@ -614,6 +618,8 @@ _swf_fff(jit_state_t *_jit, float (*i0)(float, float), jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1) || jit_fpr_p(r2)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); else @@ -635,6 +641,8 @@ _swf_ddd(jit_state_t *_jit, double (*i0)(double, double), jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1) || jit_fpr_p(r2)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); @@ -684,6 +692,8 @@ _swf_fff_(jit_state_t *_jit, float (*i0)(float, float), jit_float32_t f; } data; jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); data.f = i1; if (jit_fpr_p(r1)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); @@ -706,6 +716,8 @@ _swf_rsbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0) jit_float32_t f; } data; jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); data.f = i0; movi(_R0_REGNO, data.i); if (jit_fpr_p(r1)) @@ -729,7 +741,8 @@ _swf_ddd_(jit_state_t *_jit, double (*i0)(double, double), jit_float64_t d; } data; jit_get_reg_args(); - + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); data.d = i1; if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) @@ -769,6 +782,8 @@ _swf_rsbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0) jit_float64_t d; } data; jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); data.d = i0; movi(_R0_REGNO, data.i[0]); movi(_R1_REGNO, data.i[1]); @@ -805,6 +820,8 @@ _swf_iff(jit_state_t *_jit, int (*i0)(float, float), jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_get_reg_args(); + if (jit_fpr_p(r1) || jit_fpr_p(r2)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); else @@ -823,6 +840,8 @@ _swf_idd(jit_state_t *_jit, int (*i0)(double, double), jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_get_reg_args(); + if (jit_fpr_p(r1) || jit_fpr_p(r2)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); @@ -861,6 +880,8 @@ _swf_iff_(jit_state_t *_jit, int (*i0)(float, float), jit_float32_t f; } data; jit_get_reg_args(); + if (jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); data.f = i1; if (jit_fpr_p(r1)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); @@ -881,6 +902,8 @@ _swf_idd_(jit_state_t *_jit, int (*i0)(double, double), jit_float64_t d; } data; jit_get_reg_args(); + if (jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); data.d = i1; if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) @@ -907,6 +930,8 @@ _swf_iunff(jit_state_t *_jit, int (*i0)(float, float), { jit_word_t instr; jit_get_reg_args(); + if (jit_fpr_p(r1) || jit_fpr_p(r2)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); else @@ -952,6 +977,8 @@ _swf_iundd(jit_state_t *_jit, int (*i0)(double, double), { jit_word_t instr; jit_get_reg_args(); + if (jit_fpr_p(r1) || jit_fpr_p(r2)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); @@ -1033,6 +1060,8 @@ _swf_iunff_(jit_state_t *_jit, int (*i0)(float, float), jit_float32_t f; } data; jit_get_reg_args(); + if (jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); data.f = i1; if (jit_fpr_p(r1)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); @@ -1077,6 +1106,8 @@ _swf_iundd_(jit_state_t *_jit, int (*i0)(double, double), jit_float64_t d; } data; jit_get_reg_args(); + if (jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); data.d = i1; if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) @@ -1135,6 +1166,8 @@ _swf_bff(jit_state_t *_jit, int (*i0)(float, float), int cc, { jit_word_t w, d; jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r0)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); else @@ -1168,6 +1201,8 @@ _swf_bdd(jit_state_t *_jit, int (*i0)(double, double), int cc, { jit_word_t w, d; jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r0)) { if (!jit_thumb_p() && jit_armv5e_p()) LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); @@ -1221,6 +1256,8 @@ _swf_bff_(jit_state_t *_jit, int (*i0)(float, float), int cc, } data; jit_word_t w, d; jit_get_reg_args(); + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); data.f = i2; if (jit_fpr_p(r0)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); @@ -1256,6 +1293,8 @@ _swf_bdd_(jit_state_t *_jit, int (*i0)(double, double), int cc, jit_float64_t d; } data; jit_get_reg_args(); + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); data.d = i2; if (jit_fpr_p(r0)) { if (!jit_thumb_p() && jit_armv5e_p()) @@ -1296,6 +1335,8 @@ _swf_bunff(jit_state_t *_jit, int eq, { jit_word_t w, d, j0, j1; jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r0)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); else @@ -1366,6 +1407,8 @@ _swf_bundd(jit_state_t *_jit, int eq, { jit_word_t w, d, j0, j1; jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r0)) { if (!jit_thumb_p() && jit_armv5e_p()) LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); @@ -1473,6 +1516,8 @@ _swf_bunff_(jit_state_t *_jit, int eq, jit_word_t w, d, j0, j1; data.f = i1; jit_get_reg_args(); + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r0)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8); else @@ -1541,6 +1586,8 @@ _swf_bundd_(jit_state_t *_jit, int eq, jit_float64_t d; } data; jit_get_reg_args(); + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); data.d = i1; if (jit_fpr_p(r0)) { if (!jit_thumb_p() && jit_armv5e_p()) @@ -1622,6 +1669,8 @@ static void _swf_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_get_reg_args(); + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); movr(_R0_REGNO, r1); swf_call(__aeabi_i2f, i2f, _R1_REGNO); if (jit_fpr_p(r0)) @@ -1635,6 +1684,8 @@ static void _swf_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_get_reg_args(); + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); movr(_R0_REGNO, r1); swf_call(__aeabi_i2d, i2d, _R2_REGNO); if (jit_fpr_p(r0)) { @@ -1656,6 +1707,8 @@ static void _swf_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); @@ -1680,6 +1733,8 @@ static void _swf_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_get_reg_args(); + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); else @@ -1709,6 +1764,8 @@ _swf_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_word_t slow_not_nan; #endif jit_get_reg_args(); + if (jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); else @@ -1763,6 +1820,8 @@ _swf_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_word_t slow_not_nan; #endif jit_get_reg_args(); + if (jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p()) LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8); @@ -1823,6 +1882,8 @@ _swf_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (r0 != r1) { + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { reg = jit_get_reg(jit_class_gpr); swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8); @@ -1844,6 +1905,8 @@ _swf_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (r0 != r1) { + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { if (!jit_thumb_p() && jit_armv5e_p() && (reg = jit_get_reg_pair()) != JIT_NOREG) { @@ -1894,6 +1957,8 @@ _swf_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0) jit_float32_t f; } data; jit_int32_t reg; + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); data.f = i0; if (jit_fpr_p(r0)) { reg = jit_get_reg(jit_class_gpr); @@ -1913,6 +1978,8 @@ _swf_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0) jit_int32_t i[2]; jit_float64_t d; } data; + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); data.d = i0; if (jit_fpr_p(r0)) { if (!jit_thumb_p() && jit_armv5e_p() && @@ -1941,6 +2008,8 @@ static void _swf_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { reg = jit_get_reg(jit_class_gpr); swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8); @@ -1966,6 +2035,8 @@ static void _swf_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() && r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) { @@ -2013,6 +2084,8 @@ static void _swf_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { reg = jit_get_reg(jit_class_gpr); swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8); @@ -2038,6 +2111,8 @@ static void _swf_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; + if (jit_fpr_p(r0) || jit_fpr_p(r1)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r1)) { if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() && r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) { @@ -2170,6 +2245,7 @@ _swf_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (jit_fpr_p(r0)) { + CHECK_SWF_OFFSET(); reg = jit_get_reg(jit_class_gpr); ldxi_i(rn(reg), r1, 0); swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); @@ -2184,6 +2260,7 @@ _swf_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (jit_fpr_p(r0)) { + CHECK_SWF_OFFSET(); if (!jit_thumb_p() && jit_armv5e_p() && (reg = jit_get_reg_pair()) != JIT_NOREG) { LDRDI(rn(reg), r1, 0); @@ -2212,6 +2289,7 @@ _swf_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; if (jit_fpr_p(r0)) { + CHECK_SWF_OFFSET(); reg = jit_get_reg(jit_class_gpr); ldi_i(rn(reg), i0); swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); @@ -2225,6 +2303,8 @@ static void _swf_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t rg0, rg1; + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() && (rg0 = jit_get_reg_pair()) != JIT_NOREG) { movi(rn(rg0), i0); @@ -2258,6 +2338,7 @@ _swf_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_int32_t reg; if (jit_fpr_p(r0)) { + CHECK_SWF_OFFSET(); reg = jit_get_reg(jit_class_gpr); ldxr_i(rn(reg), r1, r2); swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8); @@ -2272,6 +2353,7 @@ _swf_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_int32_t rg0, rg1; if (jit_fpr_p(r0)) { + CHECK_SWF_OFFSET(); if (!jit_thumb_p() && jit_armv5e_p() && (rg0 = jit_get_reg_pair()) != JIT_NOREG) { LDRD(rn(rg0), r1, r2); @@ -2307,6 +2389,8 @@ static void _swf_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; + if (jit_fpr_p(r0)) + CHECK_SWF_OFFSET(); if (jit_fpr_p(r0)) { reg = jit_get_reg(jit_class_gpr); ldxi_i(rn(reg), r1, i0); @@ -2322,6 +2406,7 @@ _swf_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t rg0, rg1; if (jit_fpr_p(r0)) { + CHECK_SWF_OFFSET(); if (!jit_thumb_p() && jit_armv5e_p() && ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) && (rg0 = jit_get_reg_pair()) != JIT_NOREG) { @@ -2391,6 +2476,7 @@ _swf_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (jit_fpr_p(r1)) { + CHECK_SWF_OFFSET(); reg = jit_get_reg(jit_class_gpr); swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8); stxi_i(0, r0, rn(reg)); @@ -2405,6 +2491,7 @@ _swf_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (jit_fpr_p(r1)) { + CHECK_SWF_OFFSET(); if (!jit_thumb_p() && jit_armv5e_p() && (reg = jit_get_reg_pair()) != JIT_NOREG) { LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8); @@ -2435,6 +2522,7 @@ _swf_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; if (jit_fpr_p(r0)) { + CHECK_SWF_OFFSET(); reg = jit_get_reg(jit_class_gpr); swf_ldrin(rn(reg), _FP_REGNO, swf_off(r0) + 8); sti_i(i0, rn(reg)); @@ -2449,6 +2537,7 @@ _swf_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t rg0, rg1; if (jit_fpr_p(r0)) { + CHECK_SWF_OFFSET(); if (!jit_thumb_p() && jit_armv5e_p() && (rg0 = jit_get_reg_pair()) != JIT_NOREG) { rg1 = jit_get_reg(jit_class_gpr); @@ -2488,6 +2577,7 @@ _swf_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_int32_t reg; if (jit_fpr_p(r2)) { + CHECK_SWF_OFFSET(); reg = jit_get_reg(jit_class_gpr); swf_ldrin(rn(reg), _FP_REGNO, swf_off(r2) + 8); stxr_i(r1, r0, rn(reg)); @@ -2502,6 +2592,7 @@ _swf_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_int32_t rg0, rg1; if (jit_fpr_p(r2)) { + CHECK_SWF_OFFSET(); if (!jit_thumb_p() && jit_armv5e_p() && (rg0 = jit_get_reg_pair()) != JIT_NOREG) { LDRDIN(rn(rg0), _FP_REGNO, swf_off(r2) + 8); @@ -2538,6 +2629,7 @@ _swf_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; if (jit_fpr_p(r1)) { + CHECK_SWF_OFFSET(); reg = jit_get_reg(jit_class_gpr); swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8); stxi_i(i0, r0, rn(reg)); @@ -2552,6 +2644,7 @@ _swf_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t rg0, rg1; if (jit_fpr_p(r1)) { + CHECK_SWF_OFFSET(); if (!jit_thumb_p() && jit_armv5e_p() && ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) && (rg0 = jit_get_reg_pair()) != JIT_NOREG) { diff --git a/deps/lightning/lib/jit_arm-sz.c b/deps/lightning/lib/jit_arm-sz.c index 14f085ae..faba5a81 100644 --- a/deps/lightning/lib/jit_arm-sz.c +++ b/deps/lightning/lib/jit_arm-sz.c @@ -1,12 +1,13 @@ #if __WORDSIZE == 32 #if defined(__ARM_PCS_VFP) -#define JIT_INSTR_MAX 48 +#define JIT_INSTR_MAX 50 0, /* data */ 0, /* live */ - 2, /* align */ + 14, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 2, /* #name */ 0, /* #note */ 0, /* label */ @@ -15,7 +16,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -23,8 +27,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 16, /* va_arg_d */ @@ -98,12 +114,17 @@ 8, /* movi */ 8, /* movnr */ 8, /* movzr */ + 42, /* casr */ + 50, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 8, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ 8, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -196,13 +217,37 @@ 4, /* callr */ 20, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -351,7 +396,7 @@ 8, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 16, /* movi_d */ + 32, /* movi_d */ 4, /* ldr_d */ 12, /* ldi_d */ 8, /* ldxr_d */ @@ -365,7 +410,7 @@ 12, /* bler_d */ 28, /* blei_d */ 12, /* beqr_d */ - 28, /* beqi_d */ + 36, /* beqi_d */ 12, /* bger_d */ 28, /* bgei_d */ 12, /* bgtr_d */ @@ -402,22 +447,22 @@ 12, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 8, /* bswapr_us */ - 4, /* bswapr_ui */ - 0, /* bswapr_ul */ - 40, /* casr */ - 48, /* casi */ + 8, /* clo */ + 4, /* clz */ + 12, /* cto */ + 8, /* ctz */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ #if __WORDSIZE == 32 #if !defined(__ARM_PCS_VFP) -#define JIT_INSTR_MAX 160 +#define JIT_INSTR_MAX 50 0, /* data */ 0, /* live */ - 2, /* align */ + 18, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 2, /* #name */ 0, /* #note */ 0, /* label */ @@ -426,7 +471,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -434,8 +482,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 28, /* va_arg_d */ @@ -509,12 +569,17 @@ 8, /* movi */ 8, /* movnr */ 8, /* movzr */ + 42, /* casr */ + 46, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 20, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ 20, /* htonr_us */ 16, /* htonr_ui */ 0, /* htonr_ul */ @@ -607,13 +672,37 @@ 4, /* callr */ 20, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -621,7 +710,7 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 160, /* epilog */ + 30, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ @@ -671,7 +760,7 @@ 28, /* extr_f */ 22, /* extr_d_f */ 8, /* movr_f */ - 12, /* movi_f */ + 16, /* movi_f */ 8, /* ldr_f */ 16, /* ldi_f */ 8, /* ldxr_f */ @@ -685,7 +774,7 @@ 28, /* bler_f */ 32, /* blei_f */ 28, /* beqr_f */ - 40, /* beqi_f */ + 48, /* beqi_f */ 28, /* bger_f */ 32, /* bgei_f */ 28, /* bgtr_f */ @@ -759,10 +848,10 @@ 72, /* unordi_d */ 20, /* truncr_d_i */ 0, /* truncr_d_l */ - 28, /* extr_d */ + 36, /* extr_d */ 22, /* extr_f_d */ 16, /* movr_d */ - 20, /* movi_d */ + 32, /* movi_d */ 16, /* ldr_d */ 24, /* ldi_d */ 20, /* ldxr_d */ @@ -813,10 +902,9 @@ 12, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 20, /* bswapr_us */ - 16, /* bswapr_ui */ - 0, /* bswapr_ul */ - 40, /* casr */ - 44, /* casi */ + 8, /* clo */ + 4, /* clz */ + 12, /* cto */ + 8, /* ctz */ #endif /* __ARM_PCS_VFP */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_arm-vfp.c b/deps/lightning/lib/jit_arm-vfp.c index 4b146d25..20f80a21 100644 --- a/deps/lightning/lib/jit_arm-vfp.c +++ b/deps/lightning/lib/jit_arm-vfp.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1255,7 +1255,7 @@ _vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0) if (jit_fpr_p(r0)) { /* float arguments are packed, for others, * lightning only address even registers */ - if (!(r0 & 1) && (r0 - 16) >= 0 && + if (!(r0 & 1) && (r0 - 32) >= 0 && ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 || (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1)) VIMM(code, r0); diff --git a/deps/lightning/lib/jit_arm.c b/deps/lightning/lib/jit_arm.c index 6b121bf3..64a70f96 100644 --- a/deps/lightning/lib/jit_arm.c +++ b/deps/lightning/lib/jit_arm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -21,6 +21,8 @@ # include #endif +#define stack_framesize 48 + #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4) #define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 16) #define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 15) @@ -28,12 +30,12 @@ #define arm_patch_node 0x80000000 #define arm_patch_word 0x40000000 #define arm_patch_jump 0x20000000 -#define arm_patch_load 0x00000000 +#define arm_patch_load 0x10000000 +#define arm_patch_call 0x08000000 #define jit_fpr_p(rn) ((rn) > 15) -#define arg_base() \ - (stack_framesize - 16 + (jit_cpu.abi ? 64 : 0)) +#define arg_base() (stack_framesize - 16) #define arg_offset(n) \ ((n) < 4 ? arg_base() + ((n) << 2) : (n)) @@ -42,10 +44,32 @@ * arm mode, what may cause a crash upon return of that function * if generating jit for a relative jump. */ -#define jit_exchange_p() 1 +#define jit_exchange_p() jit_cpu.exchange /* FIXME is it really required to not touch _R10? */ +#define CHECK_REG_ARGS() \ + do { \ + if (!_jitc->function->save_reg_args) \ + _jitc->again = _jitc->function->save_reg_args = 1; \ + } while (0) + +#define CHECK_SWF_OFFSET() \ + do { \ + if (!_jitc->function->swf_offset) { \ + _jitc->again = _jitc->function->save_reg_args = \ + _jitc->function->swf_offset = 1; \ + _jitc->function->self.aoff = -64; \ + } \ + } while (0) + +#define CHECK_RETURN() \ + do { \ + if (!_jitc->function->need_frame && \ + !_jitc->function->need_return) \ + _jitc->again = _jitc->function->need_return = 1; \ + } while (0) + /* * Types */ @@ -59,8 +83,8 @@ typedef jit_pointer_t jit_va_list; /* * Prototypes */ -#define jit_make_arg(node) _jit_make_arg(_jit,node) -static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*); +#define jit_make_arg(node,code) _jit_make_arg(_jit,node,code) +static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t); #define jit_make_arg_f(node) _jit_make_arg_f(_jit,node) static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*); #define jit_make_arg_d(node) _jit_make_arg_d(_jit,node) @@ -77,8 +101,10 @@ static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t); static void _flush_consts(jit_state_t*); #define invalidate_consts() _invalidate_consts(_jit) static void _invalidate_consts(jit_state_t*); -#define patch(instr, node) _patch(_jit, instr, node) -static void _patch(jit_state_t*,jit_word_t,jit_node_t*); +#define compute_framesize() _compute_framesize(_jit) +static void _compute_framesize(jit_state_t*); +#define patch(instr, node, kind) _patch(_jit, instr, node, kind) +static void _patch(jit_state_t*,jit_word_t,jit_node_t*,jit_int32_t); #if defined(__GNUC__) /* libgcc */ @@ -149,6 +175,10 @@ jit_register_t _rvs[] = { { _NOREG, "" }, }; +static jit_int32_t iregs[] = { + _R4, _R5, _R6, _R7, _R8, _R9, +}; + /* * Implementation */ @@ -202,6 +232,14 @@ jit_get_cpu(void) /* armv6t2 todo (software float and thumb2) */ if (!jit_cpu.vfp && jit_cpu.thumb) jit_cpu.thumb = 0; + /* FIXME need test environments for the below. For the moment just + * be very conservative */ + /* force generation of code assuming jit and function libraries called + * instruction set do not match */ + jit_cpu.exchange = 1; + /* do not generate hardware integer division by default */ + if (jit_cpu.version == 7) + jit_cpu.extend = 0; } void @@ -245,15 +283,10 @@ _jit_prolog(jit_state_t *_jit) } _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; _jitc->function->self.size = stack_framesize; - if (jit_cpu.abi) - _jitc->function->self.size += 64; _jitc->function->self.argi = _jitc->function->self.argf = - _jitc->function->self.alen = 0; - if (jit_swf_p()) - /* 8 soft float registers */ - _jitc->function->self.aoff = -64; - else - _jitc->function->self.aoff = 0; + _jitc->function->self.alen = _jitc->function->self.aoff = 0; + _jitc->function->swf_offset = _jitc->function->save_reg_args = + _jitc->function->need_return = 0; _jitc->function->self.call = jit_call_default; jit_alloc((jit_pointer_t *)&_jitc->function->regoff, _jitc->reglen * sizeof(jit_int32_t)); @@ -279,6 +312,9 @@ jit_int32_t _jit_allocai(jit_state_t *_jit, jit_int32_t length) { assert(_jitc->function); + if (jit_swf_p()) + CHECK_SWF_OFFSET(); + jit_check_frame(); switch (length) { case 0: case 1: break; case 2: _jitc->function->self.aoff &= -2; break; @@ -327,20 +363,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -422,7 +456,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code != jit_code_arg) { + if (!(u->code >= jit_code_arg_c && u->code <= jit_code_arg)) { if (u->code == jit_code_arg_f) { if (jit_cpu.abi) return (jit_arg_f_reg_p(u->u.w)); @@ -437,7 +471,7 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) } static jit_node_t * -_jit_make_arg(jit_state_t *_jit, jit_node_t *node) +_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code) { jit_int32_t offset; if (jit_arg_reg_p(_jitc->function->self.argi)) @@ -447,7 +481,7 @@ _jit_make_arg(jit_state_t *_jit, jit_node_t *node) _jitc->function->self.size += sizeof(jit_word_t); } if (node == (jit_node_t *)0) - node = jit_new_node(jit_code_arg); + node = jit_new_node(code); else link_node(node); node->u.w = offset; @@ -534,9 +568,10 @@ _jit_ellipsis(jit_state_t *_jit) else { assert(!(_jitc->function->self.call & jit_call_varargs)); _jitc->function->self.call |= jit_call_varargs; + CHECK_REG_ARGS(); if (jit_cpu.abi && _jitc->function->self.argf) rewind_prolog(); - /* First 4 stack addresses are always spilled r0-r3 */ + /* First 4 stack addresses need to be spilled r0-r3 */ if (jit_arg_reg_p(_jitc->function->self.argi)) _jitc->function->vagp = _jitc->function->self.argi * 4; else @@ -559,16 +594,21 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { assert(_jitc->function); - return (jit_make_arg((jit_node_t*)0)); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif + return (jit_make_arg((jit_node_t*)0, code)); } jit_node_t * _jit_arg_f(jit_state_t *_jit) { assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); return (jit_make_arg_f((jit_node_t*)0)); } @@ -576,103 +616,141 @@ jit_node_t * _jit_arg_d(jit_state_t *_jit) { assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); return (jit_make_arg_d((jit_node_t*)0)); } void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + jit_node_t *node = NULL; + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_swf_p()) - jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w)); + node = jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w)); else if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, JIT_RA0 - v->u.w); else - jit_ldxi_c(u, JIT_FP, v->u.w); + node = jit_ldxi_c(u, JIT_FP, v->u.w); + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + jit_node_t *node = NULL; + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_swf_p()) - jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w)); + node = jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w)); else if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, JIT_RA0 - v->u.w); else - jit_ldxi_uc(u, JIT_FP, v->u.w); + node = jit_ldxi_uc(u, JIT_FP, v->u.w); + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + jit_node_t *node = NULL; + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_swf_p()) - jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w)); + node = jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w)); else if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, JIT_RA0 - v->u.w); else - jit_ldxi_s(u, JIT_FP, v->u.w); + node = jit_ldxi_s(u, JIT_FP, v->u.w); + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + jit_node_t *node = NULL; + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_swf_p()) - jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w)); + node = jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w)); else if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, JIT_RA0 - v->u.w); else - jit_ldxi_us(u, JIT_FP, v->u.w); + node = jit_ldxi_us(u, JIT_FP, v->u.w); + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + jit_node_t *node = NULL; + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_swf_p()) - jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w)); + node = jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w)); else if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); else - jit_ldxi_i(u, JIT_FP, v->u.w); + node = jit_ldxi_i(u, JIT_FP, v->u.w); + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + jit_node_t *node = NULL; + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_swf_p()) - jit_stxi(arg_offset(v->u.w), JIT_FP, u); + node = jit_stxi(arg_offset(v->u.w), JIT_FP, u); else if (jit_arg_reg_p(v->u.w)) jit_movr(JIT_RA0 - v->u.w, u); else - jit_stxi(v->u.w, JIT_FP, u); + node = jit_stxi(v->u.w, JIT_FP, u); + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { - jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + jit_int32_t regno; + jit_node_t *node = NULL; + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_swf_p()) { regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(arg_offset(v->u.w), JIT_FP, regno); + node = jit_stxi(arg_offset(v->u.w), JIT_FP, regno); jit_unget_reg(regno); } else if (jit_arg_reg_p(v->u.w)) @@ -680,30 +758,41 @@ _jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) else { regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(v->u.w, JIT_FP, regno); + node = jit_stxi(v->u.w, JIT_FP, regno); jit_unget_reg(regno); } + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { + jit_node_t *node = NULL; assert(v->code == jit_code_arg_f); jit_inc_synth_wp(getarg_f, u, v); if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) { if (jit_arg_f_reg_p(v->u.w)) jit_movr_f(u, JIT_FA0 - v->u.w); else - jit_ldxi_f(u, JIT_FP, v->u.w); + node = jit_ldxi_f(u, JIT_FP, v->u.w); } else if (jit_swf_p()) - jit_ldxi_f(u, JIT_FP, arg_offset(v->u.w)); + node = jit_ldxi_f(u, JIT_FP, arg_offset(v->u.w)); else { if (jit_arg_reg_p(v->u.w)) jit_movr_w_f(u, JIT_RA0 - v->u.w); else - jit_ldxi_f(u, JIT_FP, v->u.w); + node = jit_ldxi_f(u, JIT_FP, v->u.w); + } + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); } jit_dec_synth(); } @@ -711,21 +800,27 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { + jit_node_t *node = NULL; assert(v->code == jit_code_arg_f); jit_inc_synth_wp(putargr_f, u, v); if (jit_cpu.abi) { if (jit_arg_f_reg_p(v->u.w)) jit_movr_f(JIT_FA0 - v->u.w, u); else - jit_stxi_f(v->u.w, JIT_FP, u); + node = jit_stxi_f(v->u.w, JIT_FP, u); } else if (jit_swf_p()) - jit_stxi_f(arg_offset(v->u.w), JIT_FP, u); + node = jit_stxi_f(arg_offset(v->u.w), JIT_FP, u); else { if (jit_arg_reg_p(v->u.w)) jit_movr_f_w(JIT_RA0 - v->u.w, u); else - jit_stxi_f(v->u.w, JIT_FP, u); + node = jit_stxi_f(v->u.w, JIT_FP, u); + } + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); } jit_dec_synth(); } @@ -733,7 +828,8 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) { - jit_int32_t regno; + jit_int32_t regno; + jit_node_t *node = NULL; assert(v->code == jit_code_arg_f); jit_inc_synth_fp(putargi_f, u, v); if (jit_cpu.abi) { @@ -742,14 +838,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) else { regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); - jit_stxi_f(v->u.w, JIT_FP, regno); + node = jit_stxi_f(v->u.w, JIT_FP, regno); jit_unget_reg(regno); } } else if (jit_swf_p()) { regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); - jit_stxi_f(arg_offset(v->u.w), JIT_FP, regno); + node = jit_stxi_f(arg_offset(v->u.w), JIT_FP, regno); jit_unget_reg(regno); } else { @@ -758,30 +854,41 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) if (jit_arg_reg_p(v->u.w)) jit_movr_f_w(JIT_RA0 - v->u.w, regno); else - jit_stxi_f(v->u.w, JIT_FP, regno); + node = jit_stxi_f(v->u.w, JIT_FP, regno); jit_unget_reg(regno); } + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { + jit_node_t *node = NULL; assert(v->code == jit_code_arg_d); jit_inc_synth_wp(getarg_d, u, v); if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) { if (jit_arg_f_reg_p(v->u.w)) jit_movr_d(u, JIT_FA0 - v->u.w); else - jit_ldxi_d(u, JIT_FP, v->u.w); + node = jit_ldxi_d(u, JIT_FP, v->u.w); } else if (jit_swf_p()) - jit_ldxi_d(u, JIT_FP, arg_offset(v->u.w)); + node = jit_ldxi_d(u, JIT_FP, arg_offset(v->u.w)); else { if (jit_arg_reg_p(v->u.w)) jit_movr_ww_d(u, JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1)); else - jit_ldxi_d(u, JIT_FP, v->u.w); + node = jit_ldxi_d(u, JIT_FP, v->u.w); + } + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); } jit_dec_synth(); } @@ -789,21 +896,27 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { + jit_node_t *node = NULL; assert(v->code == jit_code_arg_d); jit_inc_synth_wp(putargr_d, u, v); if (jit_cpu.abi) { if (jit_arg_f_reg_p(v->u.w)) jit_movr_d(JIT_FA0 - v->u.w, u); else - jit_stxi_d(v->u.w, JIT_FP, u); + node = jit_stxi_d(v->u.w, JIT_FP, u); } else if (jit_swf_p()) - jit_stxi_d(arg_offset(v->u.w), JIT_FP, u); + node = jit_stxi_d(arg_offset(v->u.w), JIT_FP, u); else { if (jit_arg_reg_p(v->u.w)) jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), u); else - jit_stxi_d(v->u.w, JIT_FP, u); + node = jit_stxi_d(v->u.w, JIT_FP, u); + } + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); } jit_dec_synth(); } @@ -811,7 +924,8 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) { - jit_int32_t regno; + jit_int32_t regno; + jit_node_t *node = NULL; assert(v->code == jit_code_arg_d); jit_inc_synth_dp(putargi_d, u, v); if (jit_cpu.abi) { @@ -820,14 +934,14 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) else { regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); - jit_stxi_d(v->u.w, JIT_FP, regno); + node = jit_stxi_d(v->u.w, JIT_FP, regno); jit_unget_reg(regno); } } else if (jit_swf_p()) { regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); - jit_stxi_d(arg_offset(v->u.w), JIT_FP, regno); + node = jit_stxi_d(arg_offset(v->u.w), JIT_FP, regno); jit_unget_reg(regno); } else { @@ -836,17 +950,22 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) if (jit_arg_reg_p(v->u.w)) jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), regno); else - jit_stxi_d(v->u.w, JIT_FP, regno); + node = jit_stxi_d(v->u.w, JIT_FP, regno); jit_unget_reg(regno); } + if (node) { + CHECK_REG_ARGS(); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(JIT_RA0 - _jitc->function->call.argi, u); @@ -860,11 +979,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(JIT_RA0 - _jitc->function->call.argi, u); @@ -1148,6 +1267,7 @@ _emit_code(jit_state_t *_jit) jit_node_t *node; jit_uint8_t *data; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1293,7 +1413,7 @@ _emit_code(jit_state_t *_jit) else { \ word = name##r##type(_jit->pc.w, \ rn(node->v.w), rn(node->w.w)); \ - patch(word, node); \ + patch(word, node, arm_patch_jump); \ } \ break #define case_bvv(name, type) \ @@ -1318,7 +1438,7 @@ _emit_code(jit_state_t *_jit) word = vfp_##name##r##type(_jit->pc.w, \ rn(node->v.w), \ rn(node->w.w)); \ - patch(word, node); \ + patch(word, node, arm_patch_jump); \ } \ break #define case_brw(name, type) \ @@ -1332,7 +1452,7 @@ _emit_code(jit_state_t *_jit) else { \ word = name##i##type(_jit->pc.w, \ rn(node->v.w), node->w.w); \ - patch(word, node); \ + patch(word, node, arm_patch_jump); \ } \ break; #define case_bvf(name) \ @@ -1357,7 +1477,7 @@ _emit_code(jit_state_t *_jit) word = vfp_##name##i_f(_jit->pc.w, \ rn(node->v.w), \ node->w.f); \ - patch(word, node); \ + patch(word, node, arm_patch_jump); \ } \ break #define case_bvd(name) \ @@ -1382,7 +1502,7 @@ _emit_code(jit_state_t *_jit) word = vfp_##name##i_d(_jit->pc.w, \ rn(node->v.w), \ node->w.d); \ - patch(word, node); \ + patch(word, node, arm_patch_jump); \ } \ break #if DEVEL_DISASSEMBLER @@ -1405,6 +1525,12 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; + case jit_code_skip: + if (jit_thumb_p()) + nop((node->u.w + 1) & ~1); + else + nop((node->u.w + 3) & ~3); + break; case jit_code_note: case jit_code_name: if (must_align_p(node->next)) nop(2); @@ -1456,6 +1582,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_rrr(and,); case_rrw(and,); case_rrr(or,); @@ -1526,7 +1656,7 @@ _emit_code(jit_state_t *_jit) assert(temp->code == jit_code_label || temp->code == jit_code_epilog); word = movi_p(rn(node->u.w), temp->u.w); - patch(word, node); + patch(word, node, arm_patch_word); } } else @@ -1765,6 +1895,7 @@ _emit_code(jit_state_t *_jit) case_bvv(bunord, _d); case_bvd(bunord); case jit_code_jmpr: + jit_check_frame(); jmpr(rn(node->u.w)); flush_consts(); break; @@ -1776,36 +1907,59 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w, 1); - patch(word, node); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (jit_thumb_p()) word >>= 1; + else word >>= 2; + word -= 2; + value = _s24P(word); + word = jmpi_p(_jit->pc.w, value); + patch(word, node, value ? + arm_patch_jump : arm_patch_word); } } - else + else { + jit_check_frame(); jmpi(node->u.w); + } flush_consts(); break; case jit_code_callr: + jit_check_frame(); callr(rn(node->u.w)); break; case jit_code_calli: if (node->flag & jit_flag_node) { + CHECK_RETURN(); temp = node->u.n; assert(temp->code == jit_code_label || temp->code == jit_code_epilog); if (temp->flag & jit_flag_patch) - calli(temp->u.w); + calli(temp->u.w, 0); else { - word = calli_p(_jit->pc.w); - patch(word, node); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (jit_exchange_p()) + word -= 8; + if (jit_thumb_p()) word >>= 1; + else word >>= 2; + word -= 2; + value = _s24P(word); + word = calli_p(_jit->pc.w, value); + patch(word, node, value ? + arm_patch_call : arm_patch_word); } } - else - calli(node->u.w); + else { + jit_check_frame(); + calli(node->u.w, jit_exchange_p()); + } break; case jit_code_prolog: _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif @@ -1819,6 +1973,8 @@ _emit_code(jit_state_t *_jit) #endif restart_function: _jitc->again = 0; + compute_framesize(); + patch_alist(0); prolog(node); break; case jit_code_epilog: @@ -1833,6 +1989,21 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + undo.func.need_frame = _jitc->function->need_frame; + undo.func.need_return = _jitc->function->need_return; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + /* swf_offset and check_reg_args must also not be undone */ + undo.func.swf_offset = _jitc->function->swf_offset; + undo.func.save_reg_args = _jitc->function->save_reg_args; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif @@ -1845,6 +2016,7 @@ _emit_code(jit_state_t *_jit) if (_jitc->data_info.ptr) _jitc->data_info.offset = undo.info_offset; #endif + patch_alist(1); goto restart_function; } /* remember label is defined */ @@ -1907,21 +2079,34 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: case jit_code_getarg_s: case jit_code_getarg_us: case jit_code_getarg_i: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1984,7 +2169,10 @@ _emit_code(jit_state_t *_jit) node = _jitc->patches.ptr[offset].node; word = _jitc->patches.ptr[offset].inst; if (!jit_thumb_p() && - (node->code == jit_code_movi || node->code == jit_code_calli)) { + (node->code == jit_code_movi || + (node->code == jit_code_calli && + (_jitc->patches.ptr[offset].kind & ~arm_patch_node) == + arm_patch_word))) { /* calculate where to patch word */ value = *(jit_int32_t *)word; assert((value & 0x0f700000) == ARM_LDRI); @@ -2254,24 +2442,31 @@ _invalidate_consts(jit_state_t *_jit) } static void -_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) +_compute_framesize(jit_state_t *_jit) +{ + jit_int32_t reg; + _jitc->framesize = sizeof(jit_word_t) * 2; /* lr+fp */ + for (reg = 0; reg < jit_size(iregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) + _jitc->framesize += sizeof(jit_word_t); + + if (_jitc->function->save_reg_args) + _jitc->framesize += 16; + + /* Make sure functions called have a 8 byte aligned stack */ + _jitc->framesize = (_jitc->framesize + 7) & -8; +} + +static void +_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node, jit_int32_t kind) { jit_int32_t flag; - jit_int32_t kind; assert(node->flag & jit_flag_node); - if (node->code == jit_code_movi) { + if (node->code == jit_code_movi) flag = node->v.n->flag; - kind = arm_patch_word; - } - else { + else flag = node->u.n->flag; - if (node->code == jit_code_calli || - (node->code == jit_code_jmpi && !(node->flag & jit_flag_node))) - kind = arm_patch_word; - else - kind = arm_patch_jump; - } assert(!(flag & jit_flag_patch)); kind |= arm_patch_node; if (_jitc->patches.offset >= _jitc->patches.length) { diff --git a/deps/lightning/lib/jit_disasm.c b/deps/lightning/lib/jit_disasm.c index 9ad84f1b..a6981fa5 100644 --- a/deps/lightning/lib/jit_disasm.c +++ b/deps/lightning/lib/jit_disasm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -54,7 +54,7 @@ static FILE *disasm_stream; #endif #if BINUTILS_2_38 -static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...) +static int fprintf_styled(void * stream, enum disassembler_style style, const char* fmt, ...) { va_list args; int r; @@ -256,7 +256,7 @@ disasm_print_address(bfd_vma addr, struct disassemble_info *info) int line; char buffer[address_buffer_length]; - sprintf(buffer, address_buffer_format, (long long)addr); + sprintf(buffer, address_buffer_format, addr); (*info->fprintf_func)(info->stream, "0x%s", buffer); # define _jit disasm_jit @@ -406,7 +406,7 @@ _disassemble(jit_state_t *_jit, jit_pointer_t code, jit_int32_t length) old_line = line; } - bytes = sprintf(buffer, address_buffer_format, (long long)pc); + bytes = sprintf(buffer, address_buffer_format, pc); (*disasm_info.fprintf_func)(disasm_stream, "%*c0x%s\t", 16 - bytes, ' ', buffer); pc += (*disasm_print)(pc, &disasm_info); diff --git a/deps/lightning/lib/jit_fallback.c b/deps/lightning/lib/jit_fallback.c index 8912691d..2f7f214e 100644 --- a/deps/lightning/lib/jit_fallback.c +++ b/deps/lightning/lib/jit_fallback.c @@ -12,6 +12,55 @@ static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t); #define fallback_casx(r0,r1,r2,r3,im) _fallback_casx(_jit,r0,r1,r2,r3,im) static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t, jit_word_t); +#define fallback_clo(r0,r1) _fallback_clo(_jit,r0,r1) +static void _fallback_clo(jit_state_t*, jit_int32_t, jit_int32_t); +#define fallback_clz(r0,r1) _fallback_clz(_jit,r0,r1) +static void _fallback_clz(jit_state_t*, jit_int32_t, jit_int32_t); +#define fallback_cto(r0,r1) _fallback_cto(_jit,r0,r1) +static void _fallback_cto(jit_state_t*, jit_int32_t, jit_int32_t); +#define fallback_ctz(r0,r1) _fallback_ctz(_jit,r0,r1) +static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t); +# if defined(__ia64__) +# define fallback_patch_jmpi(inst,lbl) \ + do { \ + sync(); \ + patch_at(jit_code_jmpi, inst, lbl); \ + } while (0) +# else +# define fallback_patch_jmpi(inst,lbl) fallback_patch_at(inst,lbl) +# endif +# if defined(__arm__) +# define fallback_patch_at(inst,lbl) patch_at(arm_patch_jump,inst,lbl) +# elif defined(__ia64__) +# define fallback_patch_at(inst,lbl) \ + do { \ + sync(); \ + patch_at(jit_code_bnei, inst, lbl); \ + } while (0); +# else +# define fallback_patch_at(inst,lbl) patch_at(inst,lbl) +# endif +# if defined(__mips__) +# define fallback_jmpi(i0) jmpi(i0,1) +# elif defined(__arm__) +# define fallback_jmpi(i0) jmpi_p(i0,1) +# elif defined(__s390__) || defined(__s390x__) +# define fallback_jmpi(i0) jmpi(i0,1) +# else +# define fallback_jmpi(i0) jmpi(i0) +# endif +# if defined(__mips__) +# define fallback_bnei(i0,r0,i1) bnei(i0,r0,i1) +# elif defined(__s390__) || defined(__s390x__) +# define fallback_bnei(i0,r0,i1) bnei_p(i0,r0,i1) +# else +# define fallback_bnei(i0,r0,i1) bnei(i0,r0,i1) +# endif +# if defined(__s390__) || defined(__s390x__) +# define fallback_bmsr(i0,r0,r1) bmsr_p(i0,r0,r1) +# else +# define fallback_bmsr(i0,r0,r1) bmsr(i0,r0,r1) +# endif #endif #if CODE @@ -96,16 +145,20 @@ _fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1) { # if defined(__arm__) movi(rn(_R0), i1); -# elif defined(__ia64__) - /* avoid confusion with pushargi patching */ - if (i1 >= -2097152 && i1 <= 2097151) - MOVI(_jitc->rout, i1); - else - MOVL(_jitc->rout, i1); # elif defined(__hppa__) movi(_R26_REGNO, i1); -#endif +# endif +# if defined(__arm__) + calli(i0, jit_exchange_p()); +# elif defined(__mips__) + calli(i0, 0); +# elif defined(__powerpc__) && _CALL_SYSV + calli(i0, 0); +# elif defined(__s390__) || defined(__s390x__) + calli(i0, 0); +# else calli(i0); +# endif } static void @@ -128,7 +181,7 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, fallback_load(r2); eqr(r0, r0, r2); fallback_save(r0); - jump = bnei(_jit->pc.w, r0, 1); + jump = fallback_bnei(_jit->pc.w, r0, 1); fallback_load(r3); # if __WORDSIZE == 32 str_i(r1, r3); @@ -136,21 +189,144 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, str_l(r1, r3); # endif /* done: */ -# if defined(__ia64__) - sync(); -# endif done = _jit->pc.w; fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex); fallback_load(r0); -# if defined(__arm__) - patch_at(arm_patch_jump, jump, done); -# elif defined(__ia64__) - patch_at(jit_code_bnei, jump, done); -# else - patch_at(jump, done); -# endif + fallback_patch_at(jump, done); fallback_load_regs(r0); if (iscasi) jit_unget_reg(r1_reg); } + +static void +_fallback_clo(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t clz, done; + comr(r0, r1); + clz = fallback_bnei(_jit->pc.w, r0, 0); + movi(r0, __WORDSIZE); + done = fallback_jmpi(_jit->pc.w); + fallback_patch_at(clz, _jit->pc.w); + fallback_clz(r0, r0); + fallback_patch_jmpi(done, _jit->pc.w); +} + +static void +_fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t r1_reg, r2, r2_reg; + jit_word_t clz, l32, l16, l8, l4, l2, l1; + l32 = fallback_bnei(_jit->pc.w, r1, 0); + movi(r0, __WORDSIZE); + clz = fallback_jmpi(_jit->pc.w); + fallback_patch_at(l32, _jit->pc.w); + r2_reg = jit_get_reg(jit_class_gpr); + r2 = rn(r2_reg); + r1_reg = jit_get_reg(jit_class_gpr); + movr(rn(r1_reg), r1); + r1 = rn(r1_reg); + movi(r0, 0); +# if __WORDSIZE == 64 + movi(r2, 0xffffffff00000000UL); + l32 = fallback_bmsr(_jit->pc.w, r1, r2); + lshi(r1, r1, 32); + addi(r0, r0, 32); + fallback_patch_at(l32, _jit->pc.w); + lshi(r2, r2, 16); +# else + movi(r2, 0xffff0000UL); +# endif + l16 = fallback_bmsr(_jit->pc.w, r1, r2); + lshi(r1, r1, 16); + addi(r0, r0, 16); + fallback_patch_at(l16, _jit->pc.w); + lshi(r2, r2, 8); + l8 = fallback_bmsr(_jit->pc.w, r1, r2); + lshi(r1, r1, 8); + addi(r0, r0, 8); + fallback_patch_at(l8, _jit->pc.w); + lshi(r2, r2, 4); + l4 = fallback_bmsr(_jit->pc.w, r1, r2); + lshi(r1, r1, 4); + addi(r0, r0, 4); + fallback_patch_at(l4, _jit->pc.w); + lshi(r2, r2, 2); + l2 = fallback_bmsr(_jit->pc.w, r1, r2); + lshi(r1, r1, 2); + addi(r0, r0, 2); + fallback_patch_at(l2, _jit->pc.w); + lshi(r2, r2, 1); + l1 = fallback_bmsr(_jit->pc.w, r1, r2); + addi(r0, r0, 1); + fallback_patch_at(l1, _jit->pc.w); + fallback_patch_jmpi(clz, _jit->pc.w); + jit_unget_reg(r2_reg); + jit_unget_reg(r1_reg); +} + +static void +_fallback_cto(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t ctz, done; + comr(r0, r1); + ctz = fallback_bnei(_jit->pc.w, r0, 0); + movi(r0, __WORDSIZE); + done = fallback_jmpi(_jit->pc.w); + fallback_patch_at(ctz, _jit->pc.w); + fallback_ctz(r0, r0); + fallback_patch_jmpi(done, _jit->pc.w); +} + +static void +_fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t r1_reg, r2, r2_reg; + jit_word_t ctz, l32, l16, l8, l4, l2, l1; + l32 = fallback_bnei(_jit->pc.w, r1, 0); + movi(r0, __WORDSIZE); + ctz = fallback_jmpi(_jit->pc.w); + fallback_patch_at(l32, _jit->pc.w); + r2_reg = jit_get_reg(jit_class_gpr); + r2 = rn(r2_reg); + r1_reg = jit_get_reg(jit_class_gpr); + movr(rn(r1_reg), r1); + r1 = rn(r1_reg); + movi(r0, 0); +# if __WORDSIZE == 64 + movi(r2, 0xffffffffUL); + l32 = fallback_bmsr(_jit->pc.w, r1, r2); + rshi_u(r1, r1, 32); + addi(r0, r0, 32); + fallback_patch_at(l32, _jit->pc.w); + rshi(r2, r2, 16); +# else + movi(r2, 0xffffUL); +# endif + l16 = fallback_bmsr(_jit->pc.w, r1, r2); + rshi_u(r1, r1, 16); + addi(r0, r0, 16); + fallback_patch_at(l16, _jit->pc.w); + rshi(r2, r2, 8); + l8 = fallback_bmsr(_jit->pc.w, r1, r2); + rshi_u(r1, r1, 8); + addi(r0, r0, 8); + fallback_patch_at(l8, _jit->pc.w); + rshi(r2, r2, 4); + l4 = fallback_bmsr(_jit->pc.w, r1, r2); + rshi_u(r1, r1, 4); + addi(r0, r0, 4); + fallback_patch_at(l4, _jit->pc.w); + rshi(r2, r2, 2); + l2 = fallback_bmsr(_jit->pc.w, r1, r2); + rshi_u(r1, r1, 2); + addi(r0, r0, 2); + fallback_patch_at(l2, _jit->pc.w); + rshi(r2, r2, 1); + l1 = fallback_bmsr(_jit->pc.w, r1, r2); + addi(r0, r0, 1); + fallback_patch_at(l1, _jit->pc.w); + fallback_patch_jmpi(ctz, _jit->pc.w); + jit_unget_reg(r2_reg); + jit_unget_reg(r1_reg); +} #endif diff --git a/deps/lightning/lib/jit_hppa-cpu.c b/deps/lightning/lib/jit_hppa-cpu.c index 013460c1..ebb01fd2 100644 --- a/deps/lightning/lib/jit_hppa-cpu.c +++ b/deps/lightning/lib/jit_hppa-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -648,6 +648,10 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); #define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); +# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); # define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) @@ -663,8 +667,6 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, #define extr_uc(r0,r1) EXTRWR_U(r1,31,8,r0) #define extr_s(r0,r1) EXTRWR(r1,31,16,r0) #define extr_us(r0,r1) EXTRWR_U(r1,31,16,r0) -#define bswapr_us(r0,r1) generic_bswapr_us(_jit,r0,r1) -#define bswapr_ui(r0,r1) generic_bswapr_ui(_jit,r0,r1) #define addr(r0,r1,r2) ADD(r1,r2,r0) #define addi(r0,r1,i0) _addi(_jit,r0,r1,i0) static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); @@ -912,7 +914,7 @@ static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); #define jmpr(r0) _jmpr(_jit,r0) static void _jmpr(jit_state_t*,jit_int32_t); #define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); #define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); #define callr(r0) _callr(_jit,r0) @@ -1638,6 +1640,42 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r1); + EXTRWR_U(rn(reg), 23, 8, r0); + DEPWR(rn(reg), 23, 8, r0); + jit_unget_reg(reg); + } + else { + EXTRWR_U(r1, 23, 8, r0); + DEPWR(r1, 23, 8, r0); + } +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t reg; + if (r0 == r1) { + reg = jit_get_reg(jit_class_gpr); + movr(rn(reg), r1); + SHRPWI(rn(reg), rn(reg), 16, r0); + DEPWR(r0, 15, 8, r0); + SHRPWI(rn(reg), r0, 8, r0); + jit_unget_reg(reg); + } + else { + SHRPWI(r1, r1, 16, r0); + DEPWR(r0, 15, 8, r0); + SHRPWI(r1, r0, 8, r0); + } +} + static void _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -2632,17 +2670,19 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) BV_N(_R0_REGNO, r0); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; - w = ((i0 - _jit->pc.w) >> 2) - 2; - if (w >= -32768 && w <= 32767) - B_N(w, _R0_REGNO); + jit_word_t d, w; + w = _jit->pc.w; + d = ((i0 - w) >> 2) - 2; + if (d >= -32768 && d <= 32767) + B_N(d, _R0_REGNO); else { - movi(_R1_REGNO, w); + movi(_R1_REGNO, d); BV_N(_R0_REGNO, _R1_REGNO); } + return (w); } static jit_word_t diff --git a/deps/lightning/lib/jit_hppa-fpu.c b/deps/lightning/lib/jit_hppa-fpu.c index 6b2838d1..ed141a73 100644 --- a/deps/lightning/lib/jit_hppa-fpu.c +++ b/deps/lightning/lib/jit_hppa-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_hppa-sz.c b/deps/lightning/lib/jit_hppa-sz.c index 33ac908d..e41f89cb 100644 --- a/deps/lightning/lib/jit_hppa-sz.c +++ b/deps/lightning/lib/jit_hppa-sz.c @@ -3,9 +3,10 @@ #define JIT_INSTR_MAX 196 0, /* data */ 0, /* live */ - 0, /* align */ + 28, /* align */ 0, /* save */ 0, /* load */ + 0, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -14,7 +15,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,8 +26,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 20, /* va_arg_d */ @@ -97,12 +113,17 @@ 8, /* movi */ 12, /* movnr */ 12, /* movzr */ + 88, /* casr */ + 96, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 12, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -195,13 +216,37 @@ 40, /* callr */ 44, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -401,9 +446,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 28, /* bswapr_us */ - 68, /* bswapr_ui */ - 0, /* bswapr_ul */ - 88, /* casr */ - 96, /* casi */ + 160, /* clo */ + 140, /* clz */ + 164, /* cto */ + 144, /* ctz */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_hppa.c b/deps/lightning/lib/jit_hppa.c index 2c826d83..d3c5ef7f 100644 --- a/deps/lightning/lib/jit_hppa.c +++ b/deps/lightning/lib/jit_hppa.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -245,18 +245,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); + jit_code_inc_synth_w(code, u); jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -310,7 +310,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - assert(u->code == jit_code_arg || + assert((u->code >= jit_code_arg_c && u->code <= jit_code_arg) || u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_reg_p(u->u.w)); } @@ -343,17 +343,21 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif _jitc->function->self.size -= sizeof(jit_word_t); if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else offset = _jitc->function->self.size; - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -406,7 +410,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (v->u.w >= 0) jit_extr_c(u, _R26 - v->u.w); @@ -418,7 +422,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (v->u.w >= 0) jit_extr_uc(u, _R26 - v->u.w); @@ -430,7 +434,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (v->u.w >= 0) jit_extr_s(u, _R26 - v->u.w); @@ -442,7 +446,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (v->u.w >= 0) jit_extr_us(u, _R26 - v->u.w); @@ -454,7 +458,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (v->u.w >= 0) jit_movr(u, _R26 - v->u.w); @@ -464,10 +468,10 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (v->u.w >= 0) jit_movr(_R26 - v->u.w, u); else @@ -476,11 +480,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (v->u.w >= 0) jit_movi(_R26 - v->u.w, u); else { @@ -575,10 +579,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); _jitc->function->call.size -= sizeof(jit_word_t); if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -591,11 +595,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); _jitc->function->call.size -= sizeof(jit_word_t); if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -859,6 +863,7 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -979,6 +984,9 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; @@ -1060,6 +1068,14 @@ _emit_code(jit_state_t *_jit) break; case_rr(neg,); case_rr(com,); +#define clor(r0, r1) fallback_clo(r0, r1) +#define clzr(r0, r1) fallback_clz(r0, r1) +#define ctor(r0, r1) fallback_cto(r0, r1) +#define ctzr(r0, r1) fallback_ctz(r0, r1) + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_rr(ext, _c); case_rr(ext, _uc); case_rr(ext, _s); @@ -1339,7 +1355,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (word >= -32768 && word <= 32767) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1368,6 +1389,7 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif @@ -1388,6 +1410,18 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. + * Note that for hppa use '-' instead of '+' as hppa + * stack grows up */ + undo.func.self.aoff = _jitc->function->frame - + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif @@ -1409,24 +1443,37 @@ _emit_code(jit_state_t *_jit) case jit_code_va_arg_d: vaarg_d(rn(node->u.w), rn(node->v.w)); break; - case jit_code_live: - case jit_code_arg: case jit_code_ellipsis: + case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: case jit_code_getarg_s: case jit_code_getarg_us: case jit_code_getarg_i: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: diff --git a/deps/lightning/lib/jit_ia64-cpu.c b/deps/lightning/lib/jit_ia64-cpu.c index 068bc07e..98a10c39 100644 --- a/deps/lightning/lib/jit_ia64-cpu.c +++ b/deps/lightning/lib/jit_ia64-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -1301,6 +1301,16 @@ static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define nei(r0,r1,i0) _nei(_jit,r0,r1,i0) static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); +#define bitswap(r0, r1) _bitswap(_jit, r0, r1) +static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t); +#define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +#define clzr(r0, r1) _clzr(_jit, r0, r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +#define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +#define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); #define negr(r0,r1) subr(r0,0,r1) #define comr(r0,r1) ANDCMI(r0,-1,r1) #define movr(r0,r1) _movr(_jit,r0,r1) @@ -1500,7 +1510,7 @@ static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); #define jmpr(r0) _jmpr(_jit,r0) static void _jmpr(jit_state_t*,jit_int32_t); #define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); #define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); #define callr(r0) _callr(_jit,r0) @@ -2456,7 +2466,7 @@ _I9(jit_state_t *_jit, jit_word_t _p, TSTREG1(r3); TSTPRED(_p); TSTREG1(r1); - inst((7L<<37)|(1L<<34)|(1L<<34)|(1L<<33)| + inst((7L<<37)|(1L<<34)|(1L<<33)| (x2<<30)|(1L<<28)|(r3<<20)|(r1<<6)|_p, INST_I); SETREG(r1); } @@ -3465,6 +3475,94 @@ _nop(jit_state_t *_jit, jit_int32_t i0) assert(i0 == 0); } +static void +_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1, t2, t3, t4; + movr(r0, r1); + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + t2 = jit_get_reg(jit_class_gpr); + movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); + rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); + rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); + rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); + rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), 0x0000ffff0000ffffL); + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ + lshi(rn(t2), r0, 32); /* t2 = v << 32 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + jit_unget_reg(t2); + jit_unget_reg(t1); + jit_unget_reg(t0); +} + +static void +_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.clz) + CLZ(r0, r1); + else + fallback_clz(r0, r1); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.clz) { + comr(r0, r1); + clzr(r0, r0); + } + else + fallback_clo(r0, r1); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.clz) { + bitswap(r0, r1); + clor(r0, r0); + } + else + fallback_cto(r0, r1); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.clz) { + bitswap(r0, r1); + clzr(r0, r0); + } + else + fallback_ctz(r0, r1); +} + static void _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -5145,16 +5243,18 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) BR(BR_6); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t d; + jit_word_t d, w; sync(); - d = ((jit_word_t)i0 - _jit->pc.w) >> 4; + w = _jit->pc.w; + d = ((jit_word_t)i0 - w) >> 4; if (d >= -16777216 && d <= 16777215) BRI(d); else BRL(d); + return (w); } static jit_word_t @@ -5400,14 +5500,16 @@ _patch_at(jit_state_t *_jit, jit_code_t code, i1 = (ic >> 61) & 0x1L; i41 = (ic >> 22) & 0x1ffffffffffL; i20 = ic & 0xfffffL; - assert((tm & ~1) == TM_M_L_X_ && + if (!((tm & ~1) == TM_M_L_X_ && (s2 & 0xfL<<37) == (0xcL<<37) && - s0 == nop_m); + s0 == nop_m)) + goto short_jump; s1 = i41; s2 &= (0xcL<<37)|(0x7L<<33)|(1L<<12); s2 |= (i1<<36)|(i20<<13); break; default: + short_jump: /* Only B1 in slot 0 expected due to need to either * a stop to update predicates, or a sync before * unconditional short branch */ diff --git a/deps/lightning/lib/jit_ia64-fpu.c b/deps/lightning/lib/jit_ia64-fpu.c index 344977ea..f0fb32c7 100644 --- a/deps/lightning/lib/jit_ia64-fpu.c +++ b/deps/lightning/lib/jit_ia64-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_ia64-sz.c b/deps/lightning/lib/jit_ia64-sz.c index e65da549..e1d973c7 100644 --- a/deps/lightning/lib/jit_ia64-sz.c +++ b/deps/lightning/lib/jit_ia64-sz.c @@ -1,10 +1,11 @@ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 224 +#define JIT_INSTR_MAX 608 0, /* data */ 0, /* live */ - 0, /* align */ + 48, /* align */ 0, /* save */ 0, /* load */ + 16, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -13,7 +14,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -21,8 +25,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 16, /* va_start */ 32, /* va_arg */ 32, /* va_arg_d */ @@ -96,12 +112,17 @@ 16, /* movi */ 16, /* movnr */ 16, /* movzr */ + 48, /* casr */ + 64, /* casi */ 16, /* extr_c */ 16, /* extr_uc */ 16, /* extr_s */ 16, /* extr_us */ 16, /* extr_i */ 16, /* extr_ui */ + 32, /* bswapr_us */ + 32, /* bswapr_ui */ + 16, /* bswapr_ul */ 32, /* htonr_us */ 32, /* htonr_ui */ 16, /* htonr_ul */ @@ -194,13 +215,37 @@ 32, /* callr */ 48, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -400,9 +445,8 @@ 0, /* movi_d_ww */ 16, /* movr_d_w */ 32, /* movi_d_w */ - 32, /* bswapr_us */ - 32, /* bswapr_ui */ - 16, /* bswapr_ul */ - 48, /* casr */ - 64, /* casi */ + 608, /* clo */ + 544, /* clz */ + 608, /* cto */ + 544, /* ctz */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ia64.c b/deps/lightning/lib/jit_ia64.c index 1c35fb16..29682785 100644 --- a/deps/lightning/lib/jit_ia64.c +++ b/deps/lightning/lib/jit_ia64.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -58,6 +58,7 @@ extern void __clear_cache(void *, void *); /* * Initialization */ +jit_cpu_t jit_cpu; jit_register_t _rvs[] = { /* Always 0 */ { 0, "r0" }, @@ -239,6 +240,11 @@ jit_register_t _rvs[] = { void jit_get_cpu(void) { + jit_word_t clz = -1; + __asm__ volatile("tf.nz.unc p6,p7=32;(p6)mov %0=1;(p7)mov %0=0" + : "=r" (clz)); + assert(clz == 0 || clz == 1); + jit_cpu.clz = clz; } void @@ -345,18 +351,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); + jit_code_inc_synth_w(code, u); jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -410,9 +416,10 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - assert(u->code == jit_code_arg || - u->code == jit_code_arg_f || u->code == jit_code_arg_d); - return (jit_arg_reg_p(u->u.w)); + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) + return (jit_arg_reg_p(u->u.w)); + assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); + return (jit_arg_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8)); } void @@ -442,18 +449,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -508,7 +519,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _R32 + v->u.w); @@ -520,7 +531,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _R32 + v->u.w); @@ -532,7 +543,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _R32 + v->u.w); @@ -544,7 +555,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _R32 + v->u.w); @@ -556,7 +567,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_i(u, _R32 + v->u.w); @@ -568,7 +579,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _R32 + v->u.w); @@ -580,7 +591,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _R32 + v->u.w); @@ -590,10 +601,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_R32 + v->u.w, u); else @@ -602,11 +613,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_R32 + v->u.w, u); else { @@ -713,10 +724,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(_OUT0 + _jitc->function->call.argi, u); @@ -730,11 +741,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(_OUT0 + _jitc->function->call.argi, u); @@ -973,6 +984,7 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1122,6 +1134,10 @@ _emit_code(jit_state_t *_jit) if (node->u.w > 8) nop(node->u.w - 8); break; + case jit_code_skip: + sync(); + nop((node->u.w + 7) & ~7); + break; case jit_code_note: case jit_code_name: sync(); node->u.w = _jit->pc.w; @@ -1177,6 +1193,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case jit_code_casr: casr(rn(node->u.w), rn(node->v.w), rn(node->w.q.l), rn(node->w.q.h)); @@ -1504,7 +1524,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (word >= -16777216 && word <= 16777215) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1533,6 +1558,7 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif @@ -1571,6 +1597,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif @@ -1599,14 +1635,21 @@ _emit_code(jit_state_t *_jit) case jit_code_va_arg_d: vaarg_d(rn(node->u.w), rn(node->v.w)); break; - case jit_code_live: - case jit_code_arg: case jit_code_ellipsis: + case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1614,10 +1657,22 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: diff --git a/deps/lightning/lib/jit_loongarch-cpu.c b/deps/lightning/lib/jit_loongarch-cpu.c index 052d9ac5..ab058521 100644 --- a/deps/lightning/lib/jit_loongarch-cpu.c +++ b/deps/lightning/lib/jit_loongarch-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Free Software Foundation, Inc. + * Copyright (C) 2022-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -31,7 +31,6 @@ # define _RA_REGNO 1 # define _SP_REGNO 3 # define _FP_REGNO 22 -# define stack_framesize 160 # define ldr(u, v) ldr_l(u, v) # define ldi(u, v) ldi_l(u, v) # define ldxi(u, v, w) ldxi_l(u, v, w) @@ -335,6 +334,10 @@ static void _oj26(jit_state_t*, jit_int32_t,jit_int32_t); # define nop(i0) _nop(_jit, i0) # define comr(r0, r1) NOR(r0, r1, r1) # define negr(r0, r1) subr(r0, _ZERO_REGNO, r1) +# define clor(r0, r1) CLO_D(r0, r1) +# define clzr(r0, r1) CLZ_D(r0, r1) +# define ctor(r0, r1) CTO_D(r0, r1) +# define ctzr(r0, r1) CTZ_D(r0, r1) static void _nop(jit_state_t*,jit_int32_t); # define movr(r0, r1) _movr(_jit, r0, r1) static void _movr(jit_state_t*, jit_int32_t, jit_int32_t); @@ -580,7 +583,7 @@ static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t); # define jmpr(r0) JIRL(_ZERO_REGNO, r0, 0) # define jmpi(i0) _jmpi(_jit, i0) -static void _jmpi(jit_state_t*, jit_word_t); +static jit_word_t _jmpi(jit_state_t*, jit_word_t); # define jmpi_p(i0) _jmpi_p(_jit, i0) static jit_word_t _jmpi_p(jit_state_t*, jit_word_t); # define boaddr(i0, r0, r1) _boaddr(_jit, i0, r0, r1) @@ -625,7 +628,7 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); # define callr(r0) JIRL(_RA_REGNO, r0, 0) # define calli(i0) _calli(_jit, i0) -static void _calli(jit_state_t*, jit_word_t); +static jit_word_t _calli(jit_state_t*, jit_word_t); # define calli_p(i0) _calli_p(_jit, i0) static jit_word_t _calli_p(jit_state_t*, jit_word_t); # define prolog(i0) _prolog(_jit, i0) @@ -2134,15 +2137,17 @@ _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) return (w); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; - w = (i0 - _jit->pc.w) >> 2; + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; if (can_sign_extend_si26_p(i0)) - B(w); + B(d); else - (void)jmpi_p(i0); + w = jmpi_p(i0); + return (w); } static jit_word_t @@ -2501,15 +2506,17 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) return (w); } -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; - w = (i0 - _jit->pc.w) >> 2; + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; if (can_sign_extend_si26_p(i0)) - BL(w); + BL(d); else - (void)calli_p(i0); + w = calli_p(i0); + return (w); } static jit_word_t @@ -2527,9 +2534,10 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { - jit_int32_t reg; + jit_int32_t reg, offs; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; + jit_check_frame(); assert(_jitc->function->self.aoff >= frame); if (_jitc->function->assume_frame) return; @@ -2540,44 +2548,41 @@ _prolog(jit_state_t *_jit, jit_node_t *node) _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ _jitc->function->self.aoff) + 15) & -16; - subi(_SP_REGNO, _SP_REGNO, stack_framesize); - stxi(0, _SP_REGNO, _RA_REGNO); - stxi(8, _SP_REGNO, _FP_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _S0)) - stxi(16, _SP_REGNO, rn(_S0)); - if (jit_regset_tstbit(&_jitc->function->regset, _S1)) - stxi(24, _SP_REGNO, rn(_S1)); - if (jit_regset_tstbit(&_jitc->function->regset, _S2)) - stxi(32, _SP_REGNO, rn(_S2)); - if (jit_regset_tstbit(&_jitc->function->regset, _S3)) - stxi(40, _SP_REGNO, rn(_S3)); - if (jit_regset_tstbit(&_jitc->function->regset, _S4)) - stxi(48, _SP_REGNO, rn(_S4)); - if (jit_regset_tstbit(&_jitc->function->regset, _S5)) - stxi(56, _SP_REGNO, rn(_S5)); - if (jit_regset_tstbit(&_jitc->function->regset, _S6)) - stxi(64, _SP_REGNO, rn(_S6)); - if (jit_regset_tstbit(&_jitc->function->regset, _S7)) - stxi(72, _SP_REGNO, rn(_S7)); - if (jit_regset_tstbit(&_jitc->function->regset, _S8)) - stxi(80, _SP_REGNO, rn(_S8)); - if (jit_regset_tstbit(&_jitc->function->regset, _FS0)) - stxi_d(88, _SP_REGNO, rn(_FS0)); - if (jit_regset_tstbit(&_jitc->function->regset, _FS1)) - stxi_d(96, _SP_REGNO, rn(_FS1)); - if (jit_regset_tstbit(&_jitc->function->regset, _FS2)) - stxi_d(104, _SP_REGNO, rn(_FS2)); - if (jit_regset_tstbit(&_jitc->function->regset, _FS3)) - stxi_d(112, _SP_REGNO, rn(_FS3)); - if (jit_regset_tstbit(&_jitc->function->regset, _FS4)) - stxi_d(120, _SP_REGNO, rn(_FS4)); - if (jit_regset_tstbit(&_jitc->function->regset, _FS5)) - stxi_d(128, _SP_REGNO, rn(_FS5)); - if (jit_regset_tstbit(&_jitc->function->regset, _FS6)) - stxi_d(136, _SP_REGNO, rn(_FS6)); - if (jit_regset_tstbit(&_jitc->function->regset, _FS7)) - stxi_d(144, _SP_REGNO, rn(_FS7)); - movr(_FP_REGNO, _SP_REGNO); + + if (_jitc->function->stack) + _jitc->function->need_stack = 1; + if (!_jitc->function->need_frame && !_jitc->function->need_stack) { + /* check if any callee save register needs to be saved */ + for (reg = 0; reg < _jitc->reglen; ++reg) + if (jit_regset_tstbit(&_jitc->function->regset, reg) && + (_rvs[reg].spec & jit_class_sav)) { + _jitc->function->need_stack = 1; + break; + } + } + + if (_jitc->function->need_frame || _jitc->function->need_stack) + subi(_SP_REGNO, _SP_REGNO, jit_framesize()); + if (_jitc->function->need_frame) { + stxi(0, _SP_REGNO, _RA_REGNO); + stxi(8, _SP_REGNO, _FP_REGNO); + } + /* callee save registers */ + for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + stxi(offs, _SP_REGNO, rn(iregs[reg])); + offs += sizeof(jit_word_t); + } + } + for (reg = 0; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + stxi_d(offs, _SP_REGNO, rn(fregs[reg])); + offs += sizeof(jit_float64_t); + } + } + + if (_jitc->function->need_frame) + movr(_FP_REGNO, _SP_REGNO); if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); if (_jitc->function->allocar) { @@ -2588,7 +2593,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) } if (_jitc->function->self.call & jit_call_varargs) { for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg) - stxi(stack_framesize - ((8 - reg) * 8), + stxi(jit_framesize() - ((8 - reg) * 8), _FP_REGNO, rn(JIT_RA0 - reg)); } } @@ -2596,46 +2601,31 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg, offs; if (_jitc->function->assume_frame) return; - movr(_SP_REGNO, _FP_REGNO); - ldxi(_RA_REGNO, _SP_REGNO, 0); - ldxi(_FP_REGNO, _SP_REGNO, 8); - if (jit_regset_tstbit(&_jitc->function->regset, _S0)) - ldxi(rn(_S0), _SP_REGNO, 16); - if (jit_regset_tstbit(&_jitc->function->regset, _S1)) - ldxi(rn(_S1), _SP_REGNO, 24); - if (jit_regset_tstbit(&_jitc->function->regset, _S2)) - ldxi(rn(_S2), _SP_REGNO, 32); - if (jit_regset_tstbit(&_jitc->function->regset, _S3)) - ldxi(rn(_S3), _SP_REGNO, 40); - if (jit_regset_tstbit(&_jitc->function->regset, _S4)) - ldxi(rn(_S4), _SP_REGNO, 48); - if (jit_regset_tstbit(&_jitc->function->regset, _S5)) - ldxi(rn(_S5), _SP_REGNO, 56); - if (jit_regset_tstbit(&_jitc->function->regset, _S6)) - ldxi(rn(_S6), _SP_REGNO, 64); - if (jit_regset_tstbit(&_jitc->function->regset, _S7)) - ldxi(rn(_S7), _SP_REGNO, 72); - if (jit_regset_tstbit(&_jitc->function->regset, _S8)) - ldxi(rn(_S8), _SP_REGNO, 80); - if (jit_regset_tstbit(&_jitc->function->regset, _FS0)) - ldxi_d(rn(_FS0), _SP_REGNO, 88); - if (jit_regset_tstbit(&_jitc->function->regset, _FS1)) - ldxi_d(rn(_FS1), _SP_REGNO, 96); - if (jit_regset_tstbit(&_jitc->function->regset, _FS2)) - ldxi_d(rn(_FS2), _SP_REGNO, 104); - if (jit_regset_tstbit(&_jitc->function->regset, _FS3)) - ldxi_d(rn(_FS3), _SP_REGNO, 112); - if (jit_regset_tstbit(&_jitc->function->regset, _FS4)) - ldxi_d(rn(_FS4), _SP_REGNO, 120); - if (jit_regset_tstbit(&_jitc->function->regset, _FS5)) - ldxi_d(rn(_FS5), _SP_REGNO, 128); - if (jit_regset_tstbit(&_jitc->function->regset, _FS6)) - ldxi_d(rn(_FS6), _SP_REGNO, 136); - if (jit_regset_tstbit(&_jitc->function->regset, _FS7)) - ldxi_d(rn(_FS7), _SP_REGNO, 144); - addi(_SP_REGNO, _SP_REGNO, stack_framesize); + if (_jitc->function->need_frame) { + movr(_SP_REGNO, _FP_REGNO); + ldxi(_RA_REGNO, _SP_REGNO, 0); + ldxi(_FP_REGNO, _SP_REGNO, 8); + } + + /* callee save registers */ + for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + ldxi(rn(iregs[reg]), _SP_REGNO, offs); + offs += sizeof(jit_word_t); + } + } + for (reg = 0; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + ldxi_d(rn(fregs[reg]), _SP_REGNO, offs); + offs += sizeof(jit_float64_t); + } + } + + if (_jitc->function->need_frame || _jitc->function->need_stack) + addi(_SP_REGNO, _SP_REGNO, jit_framesize()); JIRL(_ZERO_REGNO, _RA_REGNO, 0); } @@ -2645,9 +2635,9 @@ _vastart(jit_state_t *_jit, jit_int32_t r0) assert(_jitc->function->self.call & jit_call_varargs); /* Initialize va_list to the first stack argument. */ if (jit_arg_reg_p(_jitc->function->vagp)) - addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8)); + addi(r0, _FP_REGNO, jit_framesize() - ((8 - _jitc->function->vagp) * 8)); else - addi(r0, _FP_REGNO, _jitc->function->self.size); + addi(r0, _FP_REGNO, jit_selfsize()); } static void diff --git a/deps/lightning/lib/jit_loongarch-fpu.c b/deps/lightning/lib/jit_loongarch-fpu.c index 5874afde..2871de33 100644 --- a/deps/lightning/lib/jit_loongarch-fpu.c +++ b/deps/lightning/lib/jit_loongarch-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Free Software Foundation, Inc. + * Copyright (C) 2022-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_loongarch-sz.c b/deps/lightning/lib/jit_loongarch-sz.c index 2490cfa4..4b950471 100644 --- a/deps/lightning/lib/jit_loongarch-sz.c +++ b/deps/lightning/lib/jit_loongarch-sz.c @@ -5,6 +5,7 @@ 28, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -13,7 +14,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -21,8 +25,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ @@ -96,29 +112,34 @@ 16, /* movi */ 12, /* movnr */ 12, /* movzr */ + 32, /* casr */ + 44, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ + 8, /* bswapr_us */ + 8, /* bswapr_ui */ + 4, /* bswapr_ul */ 8, /* htonr_us */ 8, /* htonr_ui */ 4, /* htonr_ul */ 4, /* ldr_c */ - 16, /* ldi_c */ + 20, /* ldi_c */ 4, /* ldr_uc */ - 16, /* ldi_uc */ + 20, /* ldi_uc */ 4, /* ldr_s */ - 16, /* ldi_s */ + 20, /* ldi_s */ 4, /* ldr_us */ - 16, /* ldi_us */ + 20, /* ldi_us */ 4, /* ldr_i */ - 16, /* ldi_i */ + 20, /* ldi_i */ 4, /* ldr_ui */ - 16, /* ldi_ui */ + 20, /* ldi_ui */ 4, /* ldr_l */ - 16, /* ldi_l */ + 20, /* ldi_l */ 4, /* ldxr_c */ 16, /* ldxi_c */ 4, /* ldxr_uc */ @@ -134,13 +155,13 @@ 4, /* ldxr_l */ 16, /* ldxi_l */ 4, /* str_c */ - 16, /* sti_c */ + 20, /* sti_c */ 4, /* str_s */ - 16, /* sti_s */ + 20, /* sti_s */ 4, /* str_i */ - 16, /* sti_i */ + 20, /* sti_i */ 4, /* str_l */ - 16, /* sti_l */ + 20, /* sti_l */ 4, /* stxr_c */ 16, /* stxi_c */ 4, /* stxr_s */ @@ -194,13 +215,37 @@ 4, /* callr */ 20, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -260,11 +305,11 @@ 4, /* movr_f */ 8, /* movi_f */ 4, /* ldr_f */ - 16, /* ldi_f */ + 20, /* ldi_f */ 4, /* ldxr_f */ 16, /* ldxi_f */ 4, /* str_f */ - 16, /* sti_f */ + 20, /* sti_f */ 4, /* stxr_f */ 16, /* stxi_f */ 8, /* bltr_f */ @@ -351,11 +396,11 @@ 4, /* movr_d */ 16, /* movi_d */ 4, /* ldr_d */ - 16, /* ldi_d */ + 20, /* ldi_d */ 4, /* ldxr_d */ 16, /* ldxi_d */ 4, /* str_d */ - 16, /* sti_d */ + 20, /* sti_d */ 4, /* stxr_d */ 16, /* stxi_d */ 8, /* bltr_d */ @@ -400,9 +445,8 @@ 0, /* movi_d_ww */ 4, /* movr_d_w */ 12, /* movi_d_w */ - 8, /* bswapr_us */ - 8, /* bswapr_ui */ - 4, /* bswapr_ul */ - 32, /* casr */ - 44, /* casi */ + 4, /* clo */ + 4, /* clz */ + 4, /* cto */ + 4, /* ctz */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_loongarch.c b/deps/lightning/lib/jit_loongarch.c index 78fac470..c9b5b8ca 100644 --- a/deps/lightning/lib/jit_loongarch.c +++ b/deps/lightning/lib/jit_loongarch.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Free Software Foundation, Inc. + * Copyright (C) 2022-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -17,6 +17,10 @@ * Paulo Cesar Pereira de Andrade */ +/* callee save + variadic arguments + * align16(ra+fp+s[0-8]+fs[0-7]) + align16(a[0-7]) */ +#define stack_framesize (144 + 64) + #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8) #define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) @@ -28,6 +32,8 @@ typedef struct jit_pointer_t jit_va_list_t; /* * Prototypes */ +#define compute_framesize() _compute_framesize(_jit) +static void _compute_framesize(jit_state_t*); #define patch(instr, node) _patch(_jit, instr, node) static void _patch(jit_state_t*,jit_word_t,jit_node_t*); @@ -107,6 +113,14 @@ jit_register_t _rvs[] = { { _NOREG, "" }, }; +static jit_int32_t iregs[] = { + _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8 +}; + +static jit_int32_t fregs[] = { + _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7 +}; + /* * Implementation */ @@ -167,6 +181,7 @@ jit_int32_t _jit_allocai(jit_state_t *_jit, jit_int32_t length) { assert(_jitc->function); + jit_check_frame(); switch (length) { case 0: case 1: break; case 2: _jitc->function->self.aoff &= -2; break; @@ -215,20 +230,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -288,16 +301,17 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); - return (jit_arg_f_reg_p(u->u.w)); + return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8)); } void _jit_ellipsis(jit_state_t *_jit) { jit_inc_synth(ellipsis); + jit_check_frame(); if (_jitc->prepare) { jit_link_prepare(); assert(!(_jitc->function->call.call & jit_call_varargs)); @@ -321,19 +335,23 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); + jit_check_frame(); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -355,6 +373,7 @@ _jit_arg_f(jit_state_t *_jit) else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_f, offset, ++_jitc->function->self.argn); @@ -378,6 +397,7 @@ _jit_arg_d(jit_state_t *_jit) else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_d, offset, ++_jitc->function->self.argn); @@ -388,111 +408,129 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _A0 - v->u.w); - else - jit_ldxi_c(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_c(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _A0 - v->u.w); - else - jit_ldxi_uc(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_uc(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _A0 - v->u.w); - else - jit_ldxi_s(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_s(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _A0 - v->u.w); - else - jit_ldxi_us(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_us(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_i(u, _A0 - v->u.w); - else - jit_ldxi_i(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_i(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _A0 - v->u.w); - else - jit_ldxi_ui(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_ui(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _A0 - v->u.w); - else - jit_ldxi_l(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_l(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_A0 - v->u.w, u); - else - jit_stxi(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_A0 - v->u.w, u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(v->u.w, JIT_FP, regno); + node = jit_stxi(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -507,8 +545,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_f(u, _FA0 - v->u.w); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8)); - else - jit_ldxi_f(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_f(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -521,8 +561,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_f(_FA0 - v->u.w, u); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u); - else - jit_stxi_f(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi_f(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -534,18 +576,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) jit_inc_synth_fp(putargi_f, u, v); if (jit_arg_f_reg_p(v->u.w)) jit_movi_f(_FA0 - v->u.w, u); - else if (jit_arg_reg_p(v->u.w - 8)) { - union { - jit_float32_t f; - jit_int32_t i; - } uu; - uu.f = u; - jit_movi(JIT_RA0 - (v->u.w - 8), uu.i); - } + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); - jit_stxi_f(v->u.w, JIT_FP, regno); + node = jit_stxi_f(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -560,8 +598,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_d(u, _FA0 - v->u.w); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8)); - else - jit_ldxi_d(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_d(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -574,8 +614,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_d(_FA0 - v->u.w, u); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u); - else - jit_stxi_d(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi_d(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -587,28 +629,24 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) jit_inc_synth_dp(putargi_d, u, v); if (jit_arg_f_reg_p(v->u.w)) jit_movi_d(_FA0 - v->u.w, u); - else if (jit_arg_reg_p(v->u.w - 8)) { - union { - jit_float64_t d; - jit_int64_t w; - } uu; - uu.d = u; - jit_movi(JIT_RA0 - (v->u.w - 8), uu.w); - } + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); - jit_stxi_d(v->u.w, JIT_FP, regno); + node = jit_stxi_d(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(_A0 - _jitc->function->call.argi, u); @@ -617,16 +655,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) else { jit_stxi(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(_A0 - _jitc->function->call.argi, u); @@ -638,6 +677,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) jit_stxi(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -660,6 +700,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) else { jit_stxi_f(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -686,6 +727,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -708,6 +750,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) else { jit_stxi_d(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -734,6 +777,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -761,6 +805,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) { jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishr, r0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; @@ -778,6 +823,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) { jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishi, (jit_word_t)i0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; @@ -877,6 +923,7 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1015,6 +1062,9 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; @@ -1062,6 +1112,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_rrr(and,); case_rrw(and,); case_rrr(or,); @@ -1383,6 +1437,7 @@ _emit_code(jit_state_t *_jit) case_brr(bunord, _d); case_brd(bunord); case jit_code_jmpr: + jit_check_frame(); jmpr(rn(node->u.w)); break; case jit_code_jmpi: @@ -1393,14 +1448,22 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (can_sign_extend_si26_p(word)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } - else + else { + jit_check_frame(); jmpi(node->u.w); + } break; case jit_code_callr: + jit_check_frame(); callr(rn(node->u.w)); break; case jit_code_calli: @@ -1411,22 +1474,32 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) calli(temp->u.w); else { - word = calli_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (can_sign_extend_si26_p(word)) + word = calli(_jit->pc.w); + else + word = calli_p(_jit->pc.w); patch(word, node); } } - else + else { + jit_check_frame(); calli(node->u.w); + } break; case jit_code_prolog: _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif undo.patch_offset = _jitc->patches.offset; restart_function: + compute_framesize(); + patch_alist(0); _jitc->again = 0; prolog(node); break; @@ -1442,10 +1515,25 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + undo.func.need_frame = _jitc->function->need_frame; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + /* this will be recomputed but undo anyway to have it + * better self documented.*/ + undo.func.need_stack = _jitc->function->need_stack; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif _jitc->patches.offset = undo.patch_offset; + patch_alist(1); goto restart_function; } if (node->link && (word = _jit->pc.w & 3)) @@ -1488,11 +1576,18 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1500,10 +1595,22 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1600,6 +1707,27 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) stxi_d(i0, rn(r0), rn(r1)); } +static void +_compute_framesize(jit_state_t *_jit) +{ + jit_int32_t reg; + _jitc->framesize = 16; /* ra+fp */ + for (reg = 0; reg < jit_size(iregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) + _jitc->framesize += sizeof(jit_word_t); + + for (reg = 0; reg < jit_size(fregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) + _jitc->framesize += sizeof(jit_float64_t); + + /* Space to store variadic arguments */ + if (_jitc->function->self.call & jit_call_varargs) + _jitc->framesize += (8 - _jitc->function->vagp) * 8; + + /* Make sure functions called have a 16 byte aligned stack */ + _jitc->framesize = (_jitc->framesize + 15) & -16; +} + static void _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) { diff --git a/deps/lightning/lib/jit_memory.c b/deps/lightning/lib/jit_memory.c index e4e5deb3..8e736da1 100644 --- a/deps/lightning/lib/jit_memory.c +++ b/deps/lightning/lib/jit_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_mips-cpu.c b/deps/lightning/lib/jit_mips-cpu.c index f52d6dc8..0b1b3b48 100644 --- a/deps/lightning/lib/jit_mips-cpu.c +++ b/deps/lightning/lib/jit_mips-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -27,11 +27,11 @@ typedef union { struct { jit_uint32_t _:16; jit_uint32_t b : 5; } ft; struct { jit_uint32_t _:11; jit_uint32_t b : 5; } rd; struct { jit_uint32_t _:11; jit_uint32_t b : 5; } fs; + struct { jit_uint32_t _: 7; jit_uint32_t b : 9; } i9; struct { jit_uint32_t _: 6; jit_uint32_t b : 5; } ic; struct { jit_uint32_t _: 6; jit_uint32_t b : 5; } fd; - struct { jit_uint32_t _: 6; jit_uint32_t b : 10; } tr; - struct { jit_uint32_t _: 6; jit_uint32_t b : 20; } br; struct { jit_uint32_t b : 6; } tc; + struct { jit_uint32_t b : 5; } cn; struct { jit_uint32_t b : 11; } cc; struct { jit_uint32_t b : 16; } is; struct { jit_uint32_t b : 26; } ii; @@ -43,22 +43,19 @@ typedef union { struct { jit_uint32_t _:11; jit_uint32_t b : 5; } ft; struct { jit_uint32_t _:16; jit_uint32_t b : 5; } rd; struct { jit_uint32_t _:16; jit_uint32_t b : 5; } fs; + struct { jit_uint32_t _:16; jit_uint32_t b : 9; } i9; struct { jit_uint32_t _:21; jit_uint32_t b : 5; } ic; struct { jit_uint32_t _:21; jit_uint32_t b : 5; } fd; - struct { jit_uint32_t _:21; jit_uint32_t b : 10; } tr; - struct { jit_uint32_t _:21; jit_uint32_t b : 20; } br; struct { jit_uint32_t _:26; jit_uint32_t b : 6; } tc; + struct { jit_uint32_t _:27; jit_uint32_t b : 5; } cn; struct { jit_uint32_t _:21; jit_uint32_t b : 11; } cc; struct { jit_uint32_t _:16; jit_uint32_t b : 16; } is; struct { jit_uint32_t _: 6; jit_uint32_t b : 26; } ii; #endif int op; } jit_instr_t; -#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) -# define jit_mips2_p() 1 -#else -# define jit_mips2_p() 0 -#endif +#define jit_mips2_p() (jit_cpu.release >= 2) +#define jit_mips6_p() (jit_cpu.release >= 6) # define _ZERO_REGNO 0 # define _T0_REGNO 0x08 # define _T1_REGNO 0x09 @@ -90,24 +87,20 @@ typedef union { # define _F28_REGNO 28 # define _F30_REGNO 30 # if __WORDSIZE == 32 -# if NEW_ABI -# define stack_framesize 144 -# else -# define stack_framesize 112 -# endif # define ldr(u,v) ldr_i(u,v) # define ldi(u,v) ldi_i(u,v) # define ldxi(u,v,w) ldxi_i(u,v,w) # define sti(u,v) sti_i(u,v) # define stxi(u,v,w) stxi_i(u,v,w) # else -# define stack_framesize 144 # define ldr(u,v) ldr_l(u,v) # define ldi(u,v) ldi_l(u,v) # define ldxi(u,v,w) ldxi_l(u,v,w) # define sti(u,v) sti_l(u,v) # define stxi(u,v,w) stxi_l(u,v,w) # endif +/* can_relative_jump_p(im) => can_sign_extend_short_p(im << 2) */ +# define can_relative_jump_p(im) ((im) >= -130712 && (im) <= 131068) # define can_sign_extend_short_p(im) ((im) >= -32678 && (im) <= 32767) # define can_zero_extend_short_p(im) ((im) >= 0 && (im) <= 65535) # define is_low_mask(im) (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0) @@ -195,6 +188,8 @@ typedef union { # define MIPS_CT 0x06 # define MIPS_MTH 0x07 # define MIPS_BC 0x08 +# define MIPS_BC1EQZ 0x09 /* release 6 */ +# define MIPS_BC1NEZ 0x0d /* release 6 */ # define MIPS_WRPGPR 0x0e # define MIPS_BGZAL 0x11 # define MIPS_MFMC0 0x11 @@ -303,17 +298,32 @@ typedef union { # define MIPS_DSRA32 0x3f # define MIPS_SDBPP 0x3f # define ii(i) *_jit->pc.ui++ = i +# define instr(op) _instr(_jit, op) +static void _instr(jit_state_t*, jit_int32_t); +# define flush() _flush(_jit) +static void _flush(jit_state_t*); +# define pending() _pending(_jit) +static jit_int32_t _pending(jit_state_t*); +# define delay(op) _delay(_jit,op) +static void _delay(jit_state_t*,jit_int32_t); +# define jit_get_reg_for_delay_slot(mask, r0,r1) \ + _jit_get_reg_for_delay_slot(_jit,mask,r0,r1) +static jit_int32_t _jit_get_reg_for_delay_slot(jit_state_t*,jit_int32_t, + jit_int32_t, jit_int32_t); +# define hrrrit(hc,rs,rt,rd,im,tc) _hrrrit(_jit,hc,rs,rt,rd,im,tc) static void _hrrrit(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t); -# define hrrrit(hc,rs,rt,rd,im,tc) _hrrrit(_jit,hc,rs,rt,rd,im,tc) # define hrrr_t(hc,rs,rt,rd,tc) hrrrit(hc,rs,rt,rd,0,tc) # define rrr_t(rs,rt,rd,tc) hrrr_t(0,rs,rt,rd,tc) # define hrri(hc,rs,rt,im) _hrri(_jit,hc,rs,rt,im) static void _hrri(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); +# define hrri9(hc,rs,rt,i9,tc) _hrri9(_jit,hc,rs,rt,i9,tc) +static void _hrri9(jit_state_t*,jit_int32_t,jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t); # define hi(hc,im) _hi(_jit,hc,im) static void _hi(jit_state_t*,jit_int32_t,jit_int32_t); -# define NOP(i0) ii(0) +# define NOP(i0) instr(0) # define nop(i0) _nop(_jit,i0) static void _nop(jit_state_t*,jit_int32_t); # define h_ri(hc,rt,im) _hrri(_jit,hc,0,rt,im) @@ -327,13 +337,29 @@ static void _nop(jit_state_t*,jit_int32_t); # define DSUBU(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_DSUBU) # define MUL(rd,rs,rt) hrrr_t(MIPS_SPECIAL2,rs,rt,rd,MIPS_MUL) # define MULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT) +# define MUL_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 24) +# define MUH_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 24) # define MULTU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU) +# define MULU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 25) +# define MUHU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 25) # define DMULT(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT) +# define DMUL_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 28) +# define DMUH_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 28) # define DMULTU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULTU) +# define DMULU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 29) +# define DMUHU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 29) # define DIV(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIV) +# define DIV_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 26) +# define MOD_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 26) # define DIVU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIVU) +# define DIVU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 27) +# define MODU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 27) # define DDIV(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIV) +# define DDIV_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 30) +# define DMOD_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 30) # define DDIVU(rs,rt) rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIVU) +# define DDIVU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 31) +# define DMODU_R6(rd,rs,rt) hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 31) # define SLLV(rd,rt,rs) rrr_t(rs,rt,rd,MIPS_SLLV) # define SLL(rd,rt,sa) rrit(rt,rd,sa,MIPS_SLL) # define DSLLV(rd,rt,rs) rrr_t(rs,rt,rd,MIPS_DSLLV) @@ -368,6 +394,7 @@ static void _nop(jit_state_t*,jit_int32_t); # define ANDI(rt,rs,im) hrri(MIPS_ANDI,rs,rt,im) # define OR(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_OR) # define ORI(rt,rs,im) hrri(MIPS_ORI,rs,rt,im) +# define NOR(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_NOR) # define XOR(rd,rs,rt) rrr_t(rs,rt,rd,MIPS_XOR) # define XORI(rt,rs,im) hrri(MIPS_XORI,rs,rt,im) # define LB(rt,of,rb) hrri(MIPS_LB,rb,rt,of) @@ -378,13 +405,17 @@ static void _nop(jit_state_t*,jit_int32_t); # define LWU(rt,of,rb) hrri(MIPS_LWU,rb,rt,of) # define LD(rt,of,rb) hrri(MIPS_LD,rb,rt,of) # define LL(rt,of,rb) hrri(MIPS_LL,rb,rt,of) +# define LL_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,54) # define LLD(rt,of,rb) hrri(MIPS_LLD,rb,rt,of) +# define LLD_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,55) # define SB(rt,of,rb) hrri(MIPS_SB,rb,rt,of) # define SH(rt,of,rb) hrri(MIPS_SH,rb,rt,of) # define SW(rt,of,rb) hrri(MIPS_SW,rb,rt,of) # define SD(rt,of,rb) hrri(MIPS_SD,rb,rt,of) # define SC(rt,of,rb) hrri(MIPS_SC,rb,rt,of) +# define SC_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,38) # define SCD(rt,of,rb) hrri(MIPS_SCD,rb,rt,of) +# define SCD_R6(rt,of,rb) hrri9(MIPS_SPECIAL3,rb,rt,of,39) # define WSBH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL) # define SEB(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL) # define SEH(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL) @@ -398,34 +429,73 @@ static void _nop(jit_state_t*,jit_int32_t); # define BGEZ(rs,im) hrri(MIPS_REGIMM,rs,MIPS_BGEZ,im) # define BGTZ(rs,im) hrri(MIPS_BGTZ,rs,_ZERO_REGNO,im) # define BNE(rs,rt,im) hrri(MIPS_BNE,rs,rt,im) +# define BGEZAL(rs,im) hrri(MIPS_REGIMM,rs,MIPS_BGEZAL,im) # define JALR(r0) hrrrit(MIPS_SPECIAL,r0,0,_RA_REGNO,0,MIPS_JALR) -# if 1 /* supports MIPS32 R6 */ -# define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JALR) -# else /* does not support MIPS32 R6 */ -# define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR) +# if 1 /* This should work for mips r6 or older */ +# define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JALR) +# else /* This should generate an illegal instruction in mips r6 */ +# define JR(r0) hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR) # endif +# define CLO_R6(rd,rs) hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x11) +# define DCLO_R6(rd,rs) hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x13) +# define CLZ_R6(rd,rs) hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x10) +# define DCLZ_R6(rd,rs) hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x12) +# define BITSWAP(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,0,0x20) +# define DBITSWAP(rd,rt) hrrrit(MIPS_SPECIAL3,0,rt,rd,0,0x24) +# define CLO(rd,rs) hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_CLO) +# define DCLO(rd,rs) hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_DCLO) +# define CLZ(rd,rs) hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_CLZ) +# define DCLZ(rd,rs) hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_DCLZ) # define J(i0) hi(MIPS_J,i0) # define JAL(i0) hi(MIPS_JAL,i0) # define MOVN(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVN) # define MOVZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,MIPS_MOVZ) +# define SELEQZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,53) +# define SELNEZ(rd,rs,rt) hrrrit(0,rs,rt,rd,0,55) # define comr(r0,r1) xori(r0,r1,-1) # define negr(r0,r1) subr(r0,_ZERO_REGNO,r1) +# define bitswap(r0,r1) _bitswap(_jit, r0, r1); +static void _bitswap(jit_state_t*,jit_int32_t,jit_int32_t); +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0, r1) _clzr(_jit, r0, r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); # if __WORDSIZE == 32 # define addr(rd,rs,rt) ADDU(rd,rs,rt) # define addiu(r0,r1,i0) ADDIU(r0,r1,i0) # define subr(rd,rs,rt) SUBU(rd,rs,rt) # define mult(rs,rt) MULT(rs,rt) +# define mul_r6(rd,rs,rt) MUL_R6(rd,rs,rt) +# define muh_r6(rd,rs,rt) MUH_R6(rd,rs,rt) # define multu(rs,rt) MULTU(rs,rt) +# define mulu_r6(rd,rs,rt) MULU_R6(rd,rs,rt) +# define muhu_r6(rd,rs,rt) MUHU_R6(rd,rs,rt) # define div(rs,rt) DIV(rs,rt) # define divu(rs,rt) DIVU(rs,rt) +# define div_r6(rd,rs,rt) DIV_R6(rd,rs,rt) +# define divu_r6(rd,rs,rt) DIVU_R6(rd,rs,rt) +# define mod_r6(rd,rs,rt) MOD_R6(rd,rs,rt) +# define modu_r6(rd,rs,rt) MODU_R6(rd,rs,rt) # else # define addr(rd,rs,rt) DADDU(rd,rs,rt) # define addiu(r0,r1,i0) DADDIU(r0,r1,i0) # define subr(rd,rs,rt) DSUBU(rd,rs,rt) # define mult(rs,rt) DMULT(rs,rt) +# define mul_r6(rd,rs,rt) DMUL_R6(rd,rs,rt) +# define muh_r6(rd,rs,rt) DMUH_R6(rd,rs,rt) # define multu(rs,rt) DMULTU(rs,rt) +# define mulu_r6(rd,rs,rt) DMULU_R6(rd,rs,rt) +# define muhu_r6(rd,rs,rt) DMUHU_R6(rd,rs,rt) # define div(rs,rt) DDIV(rs,rt) # define divu(rs,rt) DDIVU(rs,rt) +# define div_r6(rd,rs,rt) DDIV_R6(rd,rs,rt) +# define divu_r6(rd,rs,rt) DDIVU_R6(rd,rs,rt) +# define mod_r6(rd,rs,rt) DMOD_R6(rd,rs,rt) +# define modu_r6(rd,rs,rt) DMODU_R6(rd,rs,rt) # endif # define extr(rd,rt,lsb,nb) _extr(_jit,rd,rt,lsb,nb) static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); @@ -526,8 +596,10 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); -# define movnr(r0,r1,r2) MOVN(r0, r1, r2) -# define movzr(r0,r1,r2) MOVZ(r0, r1, r2) +# define movnr(r0, r1, r2) _movnr(_jit, r0, r1, r2) +static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); +# define movzr(r0, r1, r2) _movzr(_jit, r0, r1, r2) +static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define casx(r0, r1, r2, r3, i0) _casx(_jit, r0, r1, r2, r3, i0) static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, jit_int32_t,jit_int32_t,jit_word_t); @@ -672,50 +744,44 @@ static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); #define nei(r0,r1,i0) _nei(_jit,r0,r1,i0) static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -#define bltr(i0,r0,r1) _bltr(_jit,i0,r0,r1) -static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -#define bltr_u(i0,r0,r1) _bltr_u(_jit,i0,r0,r1) -static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -#define blti(i0,r0,i1) _blti(_jit,i0,r0,i1) -static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); -#define blti_u(i0,r0,i1) _blti_u(_jit,i0,r0,i1) -static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); -#define bler(i0,r0,r1) _bler(_jit,i0,r0,r1) -static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -#define bler_u(i0,r0,r1) _bler_u(_jit,i0,r0,r1) -static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -#define blei(i0,r0,i1) _blei(_jit,i0,r0,i1) -static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); -#define blei_u(i0,r0,i1) _blei_u(_jit,i0,r0,i1) -static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); +#define bltr(i0,r0,r1) bger(i0,r1,r0) +#define bltr_u(i0,r0,r1) bger_u(i0,r1,r0) +#define blti(i0,r0,i1) _bgei(_jit,i0,r0,i1,0,1) +#define blti_u(i0,r0,i1) _bgei(_jit,i0,r0,i1,1,1) +#define bler(i0,r0,r1) _bgtr(_jit,i0,r1,r0,0,1) +#define bler_u(i0,r0,r1) _bgtr(_jit,i0,r1,r0,1,1) +#define blei(i0,r0,i1) _bgti(_jit,i0,r0,i1,0,1) +#define blei_u(i0,r0,i1) _bgti(_jit,i0,r0,i1,1,1) #define beqr(i0,r0,r1) _beqr(_jit,i0,r0,r1) static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); #define beqi(i0,r0,i1) _beqi(_jit,i0,r0,i1) static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); -#define bger(i0,r0,r1) _bger(_jit,i0,r0,r1) -static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -#define bger_u(i0,r0,r1) _bger_u(_jit,i0,r0,r1) -static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -#define bgei(i0,r0,i1) _bgei(_jit,i0,r0,i1) -static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); -#define bgei_u(i0,r0,i1) _bgei_u(_jit,i0,r0,i1) -static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); -#define bgtr(i0,r0,r1) _bgtr(_jit,i0,r0,r1) -static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -#define bgtr_u(i0,r0,r1) _bgtr_u(_jit,i0,r0,r1) -static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); -#define bgti(i0,r0,i1) _bgti(_jit,i0,r0,i1) -static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); -#define bgti_u(i0,r0,i1) _bgti_u(_jit,i0,r0,i1) -static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); +#define bger(i0,r0,r1) _bger(_jit,i0,r0,r1,0) +#define bger_u(i0,r0,r1) _bger(_jit,i0,r0,r1,1) +static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t, + jit_bool_t); +#define bgei(i0,r0,i1) _bgei(_jit,i0,r0,i1,0,0) +#define bgei_u(i0,r0,i1) _bgei(_jit,i0,r0,i1,1,0) +static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t, + jit_bool_t,jit_bool_t); +#define bgtr(i0,r0,r1) _bgtr(_jit,i0,r0,r1,0,0) +#define bgtr_u(i0,r0,r1) _bgtr(_jit,i0,r0,r1,1,0) +static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t, + jit_bool_t,jit_bool_t); +#define bgti(i0,r0,i1) _bgti(_jit,i0,r0,i1,0,0) +#define bgti_u(i0,r0,i1) _bgti(_jit,i0,r0,i1,1,0) +static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t, + jit_bool_t,jit_bool_t); #define bner(i0,r0,r1) _bner(_jit,i0,r0,r1) static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); #define bnei(i0,r0,i1) _bnei(_jit,i0,r0,i1) static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); # define jmpr(r0) _jmpr(_jit,r0) static void _jmpr(jit_state_t*,jit_int32_t); -# define jmpi(i0) _jmpi(_jit,i0) -static jit_word_t _jmpi(jit_state_t*,jit_word_t); +# define jmpi(i0,patch) _jmpi(_jit,i0,patch) +static jit_word_t _jmpi(jit_state_t*,jit_word_t,jit_bool_t); +# define jmpi_p(i0) _jmpi_p(_jit,i0) +static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define boaddr(i0,r0,r1) _boaddr(_jit,i0,r0,r1) static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define boaddi(i0,r0,i1) _boaddi(_jit,i0,r0,i1) @@ -758,8 +824,8 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); # define callr(r0) _callr(_jit,r0) static void _callr(jit_state_t*,jit_int32_t); -# define calli(i0) _calli(_jit,i0) -static void _calli(jit_state_t*,jit_word_t); +# define calli(i0,i1) _calli(_jit,i0,i1) +static jit_word_t _calli(jit_state_t*,jit_word_t,jit_bool_t); # define calli_p(i0) _calli_p(_jit,i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(node) _prolog(_jit,node) @@ -774,9 +840,584 @@ static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t); #define patch_at(jump,label) _patch_at(_jit,jump,label) static void _patch_at(jit_state_t*,jit_word_t,jit_word_t); +/* definitions used by jit_get_reg_for_delay_slot() */ +#include "jit_mips-fpu.c" #endif #if CODE +static void +_instr(jit_state_t *_jit, jit_int32_t op) +{ + if (_jitc->inst.pend) + ii(_jitc->inst.op); + else + _jitc->inst.pend = 1; + _jitc->inst.op = op; +} + +static void +_flush(jit_state_t *_jit) +{ + if (_jitc->inst.pend) { + ii(_jitc->inst.op); + _jitc->inst.pend = 0; + } +} + +static jit_int32_t +_pending(jit_state_t *_jit) +{ + jit_int32_t op; + if (_jitc->inst.pend) { + op = _jitc->inst.op; + _jitc->inst.pend = 0; + } + else + op = 0; + return (op); +} + +static void +_delay(jit_state_t *_jit, jit_int32_t op) +{ + assert(_jitc->inst.pend); + ii(_jitc->inst.op); + _jitc->inst.pend = 0; + ii(op); +} + +static jit_int32_t +_jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask, + jit_int32_t reg0, jit_int32_t reg1) +{ + jit_instr_t i; + jit_int32_t reg, r0, r1, r2, regs[3]; + /* If will emit a pending instruction */ + if (_jitc->inst.pend) + i.op = _jitc->inst.op; + /* Else if at least one instruction emited, check it */ + else if (_jit->pc.uc > _jit->code.ptr) + i.op = _jit->pc.ui[-1]; + /* Else, a nop */ + else + i.op = 0; + regs[0] = regs[1] = regs[2] = -1; + switch (i.hc.b) { + case MIPS_SPECIAL: /* 00 */ + switch (i.tc.b) { + case MIPS_SLLV: /* 04 */ + case MIPS_SRLV: /* 06 */ + case MIPS_SRAV: /* 07 */ + case MIPS_DSLLV: /* 14 */ + case MIPS_DSRLV: /* 16 */ + case MIPS_DSRAV: /* 17 */ + case MIPS_ADDU: /* 21 */ + case MIPS_SUBU: /* 23 */ + case MIPS_AND: /* 24 */ + case MIPS_OR: /* 25 */ + case MIPS_XOR: /* 26 */ + case MIPS_NOR: /* 27 */ + case MIPS_SLT: /* 2a */ + case MIPS_SLTU: /* 2b */ + case MIPS_DADDU: /* 2d */ + case MIPS_DSUBU: /* 2f */ + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = i.rd.b; + } + break; + /* MUL MUH */ + case MIPS_MULT: /* 18 */ + /* MULU MUHU */ + case MIPS_MULTU: /* 19 */ + /* DIV MOD */ + case MIPS_DIV: /* 1a */ + /* DIVU MODU */ + case MIPS_DIVU: /* 1b */ + /* DMUL DMUH */ + case MIPS_DMULT: /* 1c */ + /* DMULU DMUHU */ + case MIPS_DMULTU: /* 1d */ + /* DDIV DMOD */ + case MIPS_DDIV: /* 1e */ + /* DDIVU DMODU */ + case MIPS_DDIVU: /* 1f */ + if (jit_mips6_p()) { + assert(i.ic.b == 2 || i.ic.b == 3); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = i.rd.b; + } + } + else { + assert(i.rd.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = 0; + } + } + break; + /* CLZ */ + case MIPS_MFHI: /* 10 */ + /* CLO */ + case MIPS_MTHI: /* 11 */ + /* DCLZ */ + case MIPS_MFLO: /* 12 */ + /* DCLO */ + case MIPS_MTLO: /* 13 */ + if (mask & jit_class_gpr) { + if (jit_mips6_p()) { + assert(i.ic.b == 1); + regs[1] = i.rd.b; + } + else { + assert(!i.rs.b && !i.rt.b); + regs[1] = 0; + } + regs[0] = i.rd.b; + regs[1] = 0; + } + break; + case MIPS_JR: /* 08 */ + assert(!jit_mips6_p()); + case MIPS_JALR: /* 09 */ + /* check for proper/known encondig */ + assert(!i.ic.b); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = i.rd.b; + } + break; + case MIPS_SLL: /* 00 */ + case MIPS_SRL: /* 02 */ + case MIPS_SRA: /* 03 */ + case MIPS_DSLL: /* 38 */ + case MIPS_DSRL: /* 3a */ + case MIPS_DSRA: /* 3b */ + case MIPS_DSLL32: /* 3c */ + case MIPS_DSRA32: /* 3f */ + case MIPS_DSRL32: /* 3e */ + /* shift (or rotate if i.rs.b == 1) */ + assert(i.rs.b == 0 || i.rs.b == 1); + if (mask & jit_class_gpr) { + regs[0] = i.rt.b; + regs[1] = i.rd.b; + regs[2] = 0; + } + break; + case MIPS_SYNC: /* 0f */ + assert(i.rs.b == 0 && i.rt.b == 0 && i.rd.b == 0); + if (mask & jit_class_gpr) + regs[0] = regs[1] = regs[1] = 0; + break; + case MIPS_MOVZ: /* 0a */ + case MIPS_MOVN: /* 0b */ + assert(!jit_mips6_p() && i.ic.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = i.rd.b; + } + break; + /* SELEQZ */ + case 53: /* 35 */ + /* SELNEZ */ + case 55: /* 37 */ + assert(jit_mips6_p() && i.ic.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = i.rd.b; + } + break; + default: + abort(); + } + break; + case MIPS_REGIMM: /* 01 */ + switch (i.rt.b) { + case MIPS_BLTZ: /* 00 */ + case MIPS_BGEZ: /* 01 */ + case MIPS_BGEZAL: /* 11 */ + break; + default: + abort(); + } + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = regs[2] = 0; + } + break; + case MIPS_J: /* 02 */ + case MIPS_JAL: /* 03 */ + if (mask & jit_class_gpr) + regs[0] = regs[1] = regs[2] = 0; + break; + case MIPS_LUI: /* 0f */ + assert(i.rs.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rt.b; + regs[1] = regs[1] = 0; + } + break; + case MIPS_SPECIAL2: /* 1c */ + switch (i.tc.b) { + case MIPS_CLZ: /* 20 */ + case MIPS_CLO: /* 21 */ + case MIPS_DCLZ: /* 24 */ + case MIPS_DCLO: /* 25 */ + assert(!jit_mips6_p() && i.rt.b == i.rd.b && i.ic.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rd.b; + regs[2] = 0; + } + break; + case MIPS_MUL: /* 02 */ + assert(jit_mips2_p() && i.ic.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = i.rd.b; + } + break; + default: + abort(); + } + break; + case MIPS_SPECIAL3: /* 1f */ + switch (i.tc.b) { + case MIPS_EXT: /* 00 */ + case MIPS_DEXTM: /* 01 */ + case MIPS_DEXTU: /* 02 */ + case MIPS_DEXT: /* 03 */ + case MIPS_INS: /* 04 */ + case MIPS_DINSM: /* 05 */ + case MIPS_DINSU: /* 06 */ + case MIPS_DINS: /* 07 */ + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = 0; + } + break; + /* BITSWAP */ + case MIPS_BSHFL: /* 20 */ + /* DBITSWAP */ + case MIPS_DBSHFL: /* 24 */ + switch (i.ic.b) { + case MIPS_WSBH: /* 02 */ + case MIPS_SEB: /* 10 */ + case MIPS_SEH: /* 18 */ + if (mask & jit_class_gpr) { + regs[0] = i.rt.b; + regs[1] = i.rd.b; + regs[2] = 0; + } + break; + /* BITSWAP DBITSWAP */ + case 0: + assert(jit_mips6_p() && i.rt.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rd.b; + regs[2] = 0; + } + break; + default: + abort(); + } + break; + /* SC */ + case 38: /* 26 */ + /* SCD */ + case 39: /* 27 */ + /* LD */ + case 54: /* 36 */ + /* LLD */ + case 55: /* 37 */ + assert(jit_mips6_p()); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = 0; + } + break; + default: + abort(); + } + break; + case MIPS_COP1: /* 11 */ + switch (i.tc.b) { + case MIPS_ADD_fmt: /* 00 */ + switch (i.rs.b) { + case MIPS_MF: /* 00 */ + case MIPS_DMF: /* 01 */ + case MIPS_MFH: /* 03 */ + case MIPS_MT: /* 04 */ + case MIPS_DMT: /* 05 */ + case MIPS_MTH: /* 07 */ + assert(i.ic.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rt.b; + regs[1] = regs[2] = 0; + } + else + regs[0] = i.rd.b; + break; + default: + goto three_fprs; + } + break; + case MIPS_SUB_fmt: /* 01 */ + case MIPS_MUL_fmt: /* 02 */ + case MIPS_DIV_fmt: /* 03 */ + three_fprs: + /* 10 */ + assert(i.rs.b == MIPS_fmt_S || + /* 11 */ + i.rs.b == MIPS_fmt_D); + if (mask & jit_class_gpr) + regs[0] = regs[1] = regs[2] = 0; + else { + regs[0] = i.rt.b; + regs[1] = i.rd.b; + regs[2] = i.ic.b; + } + break; + case MIPS_SQRT_fmt: /* 04 */ + case MIPS_ABS_fmt: /* 05 */ + case MIPS_MOV_fmt: /* 06 */ + case MIPS_NEG_fmt: /* 07 */ + assert((i.rs.b == MIPS_fmt_S || i.rs.b == MIPS_fmt_D) && + i.rt.b == 0); + if (mask & jit_class_gpr) + regs[0] = regs[1] = regs[2] = 0; + else { + regs[0] = i.rd.b; + regs[1] = i.ic.b; + } + break; + case MIPS_CVT_fmt_S: /* 20 */ + case MIPS_CVT_fmt_D: /* 21 */ + case MIPS_CVT_fmt_W: /* 24 */ + case MIPS_CVT_fmt_L: /* 25 */ + switch (i.rs.b) { + case MIPS_fmt_S:/* 10 */ + case MIPS_fmt_D:/* 11 */ + case MIPS_fmt_W:/* 14 */ + case MIPS_fmt_L:/* 15 */ + break; + default: + abort(); + } + assert(i.rt.b == 0); + if (mask & jit_class_gpr) + regs[0] = regs[1] = regs[2] = 0; + else { + regs[0] = i.rd.b; + regs[1] = i.ic.b; + } + break; + case MIPS_cond_F: /* 30 */ + case MIPS_cond_UN: /* 31 */ + case MIPS_cond_EQ: /* 32 */ + case MIPS_cond_UEQ: /* 33 */ + case MIPS_cond_OLT: /* 34 */ + case MIPS_cond_ULT: /* 35 */ + case MIPS_cond_OLE: /* 36 */ + case MIPS_cond_ULE: /* 37 */ + case MIPS_cond_SF: /* 38 */ + case MIPS_cond_NGLE: /* 39 */ + case MIPS_cond_SEQ: /* 3a */ + case MIPS_cond_NGL: /* 3b */ + case MIPS_cond_LT: /* 3c */ + case MIPS_cond_NGE: /* 3d */ + case MIPS_cond_LE: /* 3e */ + case MIPS_cond_UGT: /* 3f */ + assert(!jit_mips6_p() && + /* 10 */ + (i.fm.b == MIPS_fmt_S || + /* 11 */ + i.fm.b == MIPS_fmt_D)); + if (mask & jit_class_gpr) + regs[0] = regs[1] = regs[2] = 0; + else { + regs[0] = i.ft.b; + regs[1] = i.fs.b; + } + break; + default: + switch (i.ic.b) { + case MIPS_cmp_AF: /* 00 */ + case MIPS_cmp_UN: /* 01 */ + case MIPS_cmp_EQ: /* 02 */ + case MIPS_cmp_UEQ: /* 03 */ + case MIPS_cmp_LT: /* 04 */ + case MIPS_cmp_ULT: /* 05 */ + case MIPS_cmp_LE: /* 06 */ + case MIPS_cmp_ULE: /* 07 */ + case MIPS_cmp_SAF: /* 08 */ + case MIPS_cmp_SUN: /* 09 */ + case MIPS_cmp_SEQ: /* 0a */ + case MIPS_cmp_SUEQ:/* 0b */ + case MIPS_cmp_SLT: /* 0c */ + case MIPS_cmp_SULT:/* 0d */ + case MIPS_cmp_SLE: /* 0e */ + case MIPS_cmp_SULE:/* 0f */ + assert(jit_mips6_p() && + /* 14 */ + (i.rs.b == MIPS_condn_S || + /* 15 */ + i.rs.b == MIPS_condn_D)); + if (mask & jit_class_gpr) + regs[0] = regs[1] = regs[2] = 0; + else { + regs[0] = i.ft.b; + regs[1] = i.fs.b; + regs[2] = i.fd.b; + } + goto done; + default: + break; + } + switch (i.rt.b) { + case MIPS_BC: /* 08 */ + assert(!jit_mips6_p() && + /* 00 */ + (i.rs.b == MIPS_BCF || + /* 01 */ + i.rs.b == MIPS_BCT)); + if (mask & jit_class_gpr) + regs[0] = regs[1] = regs[2] = 0; + else { + regs[0] = i.rt.b; + regs[1] = i.rd.b; + } + break; + case MIPS_BC1EQZ:/* 09 */ + case MIPS_BC1NEZ:/* 0a */ + assert(jit_mips6_p()); + if (mask & jit_class_gpr) + regs[0] = regs[1] = regs[2] = 0; + else + regs[0] = i.rt.b; + break; + default: + abort(); + } + break; + } + break; + case MIPS_ADDIU: /* 09 */ + case MIPS_SLTI: /* 0a */ + case MIPS_SLTIU: /* 0b */ + case MIPS_ANDI: /* 0c */ + case MIPS_ORI: /* 0d */ + case MIPS_XORI: /* 0e */ + case MIPS_DADDIU: /* 18 */ + case MIPS_LB: /* 20 */ + case MIPS_LH: /* 21 */ + case MIPS_LW: /* 23 */ + case MIPS_LBU: /* 24 */ + case MIPS_LHU: /* 25 */ + case MIPS_LWU: /* 27 */ + case MIPS_SB: /* 28 */ + case MIPS_SH: // 29 */ + case MIPS_SW: /* 2b */ + case MIPS_LD: /* 37 */ + case MIPS_SD: /* 3f */ + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = 0; + } + break; + case MIPS_LL: /* 30 */ + case MIPS_LLD: /* 34 */ + case MIPS_SC: /* 38 */ + case MIPS_SCD: /* 3c */ + assert(!jit_mips6_p() && i.ic.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = 0; + } + break; + case MIPS_BLEZ: /* 06 */ + case MIPS_BGTZ: /* 07 */ + assert(i.rt.b == 0); + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = regs[2] = 0; + } + break; + case MIPS_BEQ: /* 04 */ + case MIPS_BNE: /* 05 */ + assert(i.rt.b == 0); + case MIPS_LWC1: /* 31 */ + case MIPS_LDC1: /* 35 */ + case MIPS_SWC1: /* 39 */ + case MIPS_SDC1: /* 3d */ + if (mask & jit_class_gpr) { + regs[0] = i.rs.b; + regs[1] = i.rt.b; + regs[2] = 0; + } + else + regs[0] = i.rt.b; + break; + default: + abort(); + } +done: + /* If cannot move instruction do delay slot */ + if (_jitc->inst.pend && + (((mask & jit_class_fpr) || reg0) && + (reg0 == regs[0] || reg0 == regs[1] || reg0 == regs[2])) || + (((mask & jit_class_fpr) || reg1) && + (reg1 == regs[0] || reg1 == regs[1] || reg1 == regs[2]))) { + flush(); + } + /* Get a temporary register */ +retry: + reg = jit_get_reg(mask|jit_class_nospill); + /* Make sure will not use a register in use by delay slot */ + if (_jitc->inst.pend) { + if (rn(reg) == regs[0] || + rn(reg) == regs[1] || rn(reg) == regs[2]) { + r0 = reg; + reg = jit_get_reg(mask|jit_class_nospill); + if (rn(reg) == regs[0] || + rn(reg) == regs[1] || rn(reg) == regs[2]) { + r1 = reg; + reg = jit_get_reg(mask|jit_class_nospill); + if (rn(reg) == regs[0] || + rn(reg) == regs[1] || rn(reg) == regs[2]) { + r2 = reg; + reg = jit_get_reg(mask|jit_class_nospill); + jit_unget_reg(r2); + } + jit_unget_reg(r1); + } + jit_unget_reg(r0); + } + } + if (reg == JIT_NOREG) { + /* Cannot get a register to optimize delay slot */ + flush(); + /* Must find a free register */ + if (!(mask & jit_class_chk)) + goto retry; + } + assert(reg != JIT_NOREG || (mask & jit_class_chk)); + return (reg); +} + static void _hrrrit(jit_state_t *_jit,jit_int32_t hc, jit_int32_t rs, jit_int32_t rt, jit_int32_t rd, @@ -789,7 +1430,7 @@ _hrrrit(jit_state_t *_jit,jit_int32_t hc, i.rt.b = rt; i.rs.b = rs; i.hc.b = hc; - ii(i.op); + instr(i.op); } static void @@ -802,7 +1443,21 @@ _hrri(jit_state_t *_jit, jit_int32_t hc, i.rt.b = rt; i.rs.b = rs; i.hc.b = hc; - ii(i.op); + instr(i.op); +} + +static void +_hrri9(jit_state_t *_jit, jit_int32_t hc, + jit_int32_t rs, jit_int32_t rt, jit_int32_t i9, jit_int32_t tc) +{ + jit_instr_t i; + i.op = 0; + i.tc.b = tc; + i.i9.b = i9; + i.rt.b = rt; + i.rs.b = rs; + i.hc.b = hc; + instr(i.op); } static void @@ -811,7 +1466,7 @@ _hi(jit_state_t *_jit, jit_int32_t hc, jit_int32_t im) jit_instr_t i; i.ii.b = im; i.hc.b = hc; - ii(i.op); + instr(i.op); } static void @@ -854,6 +1509,121 @@ _insr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, DINS(r0, r1, pos, size); } +/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ +/* +unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2 +unsigned int mask = ~0; +while ((s >>= 1) > 0) +{ + mask ^= (mask << s); + v = ((v >> s) & mask) | ((v << s) & ~mask); +} +*/ +static void +_bitswap(jit_state_t *_jit, jit_int32_t v, jit_int32_t r1) +{ + jit_int32_t s, mask; + jit_word_t loop, done, t0, t1; + movr(v, r1); + s = jit_get_reg(jit_class_gpr); + movi(rn(s), __WORDSIZE); /* s = sizeof(v) * CHAR_BIT; */ + mask = jit_get_reg(jit_class_gpr); + movi(rn(mask), ~0L); /* mask = ~0; */ + flush(); + loop = _jit->pc.w; /* while ((s >>= 1) > 0) */ + rshi(rn(s), rn(s), 1); /* (s >>= 1) */ + done = blei(_jit->pc.w, rn(s), 0); /* no loop if s <= 0 */ + t0 = jit_get_reg(jit_class_gpr); + lshr(rn(t0), rn(mask), rn(s)); /* t0 = (mask << s) */ + xorr(rn(mask), rn(mask), rn(t0)); /* mask ^= t0 */ + rshr(rn(t0), v, rn(s)); /* t0 = v >> s */ + andr(rn(t0), rn(t0), rn(mask)); /* t0 = t0 & mask */ + t1 = jit_get_reg(jit_class_gpr); + lshr(rn(t1), v, rn(s)); /* t1 = v << s */ + comr(v, rn(mask)); /* v = ~mask */ + andr(rn(t1), v, rn(t1)); /* t1 = t1 & v */ + orr(v, rn(t0), rn(t1)); /* v = t0 | t1 */ + jmpi(loop, 0); + flush(); + patch_at(done, _jit->pc.w); + jit_unget_reg(t1); + jit_unget_reg(t0); + jit_unget_reg(mask); + jit_unget_reg(s); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ +#if __WORDSIZE == 32 + if (jit_mips6_p()) + CLO_R6(r0, r1); + else + CLO(r0, r1); +#else + if (jit_mips6_p()) + DCLO_R6(r0, r1); + else + DCLO(r0, r1); +#endif +} + +static void +_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ +#if __WORDSIZE == 32 + if (jit_mips6_p()) + CLZ_R6(r0, r1); + else + CLZ(r0, r1); +#else + if (jit_mips6_p()) + DCLZ_R6(r0, r1); + else + DCLZ(r0, r1); +#endif +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_mips6_p()) { +#if __WORDSIZE == 32 + BITSWAP(r0, r1); + bswapr_ui(r0, r0); + CLO_R6(r0, r0); +#else + DBITSWAP(r0, r1); + bswapr_ul(r0, r0); + DCLO_R6(r0, r0); +#endif + } + else { + bitswap(r0, r1); + clor(r0, r0); + } +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_mips6_p()) { +#if __WORDSIZE == 32 + BITSWAP(r0, r1); + bswapr_ui(r0, r0); + CLZ_R6(r0, r0); +#else + DBITSWAP(r0, r1); + bswapr_ul(r0, r0); + DCLZ_R6(r0, r0); +#endif + } + else { + bitswap(r0, r1); + clzr(r0, r0); + } +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -1048,11 +1818,15 @@ _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - if (jit_mips2_p() && __WORDSIZE == 32) - MUL(r0, r1, r2); + if (jit_mips6_p()) + mul_r6(r0, r1, r2); else { - multu(r1, r2); - MFLO(r0); + if (jit_mips2_p() && __WORDSIZE == 32) + MUL(r0, r1, r2); + else { + multu(r1, r2); + MFLO(r0); + } } } @@ -1071,12 +1845,38 @@ static void _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) { - if (sign) - mult(r2, r3); - else - multu(r2, r3); - MFLO(r0); - MFHI(r1); + jit_int32_t t0; + if (jit_mips6_p()) { + if (r0 == r2 || r0 == r3) { + t0 = jit_get_reg(jit_class_gpr); + if (sign) + mul_r6(rn(t0), r2, r3); + else + mulu_r6(rn(t0), r2, r3); + } + else { + if (sign) + mul_r6(r0, r2, r3); + else + mulu_r6(r0, r2, r3); + } + if (sign) + muh_r6(r1, r2, r3); + else + muhu_r6(r1, r2, r3); + if (r0 == r2 || r0 == r3) { + movr(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else { + if (sign) + mult(r2, r3); + else + multu(r2, r3); + MFLO(r0); + MFHI(r1); + } } static void @@ -1093,8 +1893,12 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, static void _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - div(r1, r2); - MFLO(r0); + if (jit_mips6_p()) + div_r6(r0, r1, r2); + else { + div(r1, r2); + MFLO(r0); + } } static void @@ -1110,8 +1914,12 @@ _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - divu(r1, r2); - MFLO(r0); + if (jit_mips6_p()) + divu_r6(r0, r1, r2); + else { + divu(r1, r2); + MFLO(r0); + } } static void @@ -1128,12 +1936,39 @@ static void _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_bool_t sign) { - if (sign) - div(r2, r3); - else - divu(r2, r3); - MFLO(r0); - MFHI(r1); + jit_int32_t t0; + if (jit_mips6_p()) { + if (r0 == r2 || r0 == r3) + t0 = jit_get_reg(jit_class_gpr); + else + t0 = _NOREG; + if (sign) { + if (t0 == _NOREG) + div_r6(r0, r2, r3); + else + div_r6(rn(t0), r2, r3); + mod_r6(r1, r2, r3); + } + else { + if (t0 == _NOREG) + divu_r6(r0, r2, r3); + else + divu_r6(rn(t0), r2, r3); + modu_r6(r1, r2, r3); + } + if (t0 != _NOREG) { + movr(r0, rn(t0)); + jit_unget_reg(t0); + } + } + else { + if (sign) + div(r2, r3); + else + divu(r2, r3); + MFLO(r0); + MFHI(r1); + } } static void @@ -1150,8 +1985,12 @@ _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, static void _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - div(r1, r2); - MFHI(r0); + if (jit_mips6_p()) + mod_r6(r0, r1, r2); + else { + div(r1, r2); + MFHI(r0); + } } static void @@ -1167,8 +2006,12 @@ _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - divu(r1, r2); - MFHI(r0); + if (jit_mips6_p()) + modu_r6(r0, r1, r2); + else { + divu(r1, r2); + MFHI(r0); + } } static void @@ -1322,7 +2165,7 @@ static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_word_t w; - + flush(); w = _jit->pc.w; # if __WORDSIZE == 32 LUI(r0, i0 >> 16); @@ -1339,6 +2182,36 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) return (w); } +static void +_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_gpr); + SELNEZ(rn(reg), r1, r2); + SELEQZ(r0, r0, r2); + OR(r0, r0, rn(reg)); + jit_unget_reg(reg); + } + else + MOVN(r0, r1, r2); +} + +static void +_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) +{ + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_gpr); + SELEQZ(rn(reg), r1, r2); + SELNEZ(r0, r0, r2); + OR(r0, r0, rn(reg)); + jit_unget_reg(reg); + } + else + MOVZ(r0, r1, r2); +} + static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) @@ -1352,27 +2225,37 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, } SYNC(); /* retry: */ + flush(); retry = _jit->pc.w; # if __WORDSIZE == 32 - LL(r0, 0, r1); + if (jit_mips6_p()) LL_R6(r0, 0, r1); + else LL(r0, 0, r1); # else - LLD(r0, 0, r1); + if (jit_mips6_p()) LLD_R6(r0, 0, r1); + else LLD(r0, 0, r1); # endif + flush(); jump0 = _jit->pc.w; BNE(r0, r2, 1); /* bne done r0 r2 */ movi(r0, 0); /* set to 0 in delay slot */ + flush(); movr(r0, r3); /* after jump and delay slot */ /* store new value */ # if __WORDSIZE == 32 - SC(r0, 0, r1); + if (jit_mips6_p()) SC_R6(r0, 0, r1); + else SC(r0, 0, r1); # else - SCD(r0, 0, r1); + if (jit_mips6_p()) SCD_R6(r0, 0, r1); + else SCD(r0, 0, r1); # endif + flush(); jump1 = _jit->pc.w; BEQ(r0, _ZERO_REGNO, 0); /* beqi retry r0 0 */ movi(r0, 1); /* set to 1 in delay slot */ + flush(); SYNC(); /* done: */ + flush(); done = _jit->pc.w; patch_at(jump0, done); patch_at(jump1, retry); @@ -1483,120 +2366,90 @@ _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) static void _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - ldr_c(r0, rn(reg)); - jit_unget_reg(reg); + addr(r0, r1, r2); + ldr_c(r0, r0); } static void _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; if (can_sign_extend_short_p(i0)) LB(r0, i0, r1); else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_c(r0, rn(reg)); - jit_unget_reg(reg); + addi(r0, r1, i0); + ldr_c(r0, r0); } } static void _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - ldr_uc(r0, rn(reg)); - jit_unget_reg(reg); + addr(r0, r1, r2); + ldr_uc(r0, r0); } static void _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; if (can_sign_extend_short_p(i0)) LBU(r0, i0, r1); else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_uc(r0, rn(reg)); - jit_unget_reg(reg); + addi(r0, r1, i0); + ldr_uc(r0, r0); } } static void _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - ldr_s(r0, rn(reg)); - jit_unget_reg(reg); + addr(r0, r1, r2); + ldr_s(r0, r0); } static void _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; if (can_sign_extend_short_p(i0)) LH(r0, i0, r1); else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_s(r0, rn(reg)); - jit_unget_reg(reg); + addi(r0, r1, i0); + ldr_s(r0, r0); } } static void _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - ldr_us(r0, rn(reg)); - jit_unget_reg(reg); + addr(r0, r1, r2); + ldr_us(r0, r0); } static void _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; if (can_sign_extend_short_p(i0)) LHU(r0, i0, r1); else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_us(r0, rn(reg)); - jit_unget_reg(reg); + addi(r0, r1, i0); + ldr_us(r0, r0); } } static void _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - ldr_i(r0, rn(reg)); - jit_unget_reg(reg); + addr(r0, r1, r2); + ldr_i(r0, r0); } static void _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; if (can_sign_extend_short_p(i0)) LW(r0, i0, r1); else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_i(r0, rn(reg)); - jit_unget_reg(reg); + addi(r0, r1, i0); + ldr_i(r0, r0); } } @@ -1604,48 +2457,36 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - ldr_ui(r0, rn(reg)); - jit_unget_reg(reg); + addr(r0, r1, r2); + ldr_ui(r0, r0); } static void _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; if (can_sign_extend_short_p(i0)) LWU(r0, i0, r1); else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_ui(r0, rn(reg)); - jit_unget_reg(reg); + addi(r0, r1, i0); + ldr_ui(r0, r0); } } static void _ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2) { - jit_int32_t reg; - reg = jit_get_reg(jit_class_gpr); - addr(rn(reg), r1, r2); - ldr_l(r0, rn(reg)); - jit_unget_reg(reg); + addr(r0, r1, r2); + ldr_l(r0, r0); } static void _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; if (can_sign_extend_short_p(i0)) LD(r0, i0, r1); else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_l(r0, rn(reg)); - jit_unget_reg(reg); + addi(r0, r1, i0); + ldr_l(r0, r0); } } #endif @@ -1948,8 +2789,7 @@ static void _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { subr(r0, r1, r2); - SLTU(r0, _ZERO_REGNO, r0); - XORI(r0, r0, 1); + SLTIU(r0, r0, 1); } static void @@ -1957,11 +2797,10 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { if (i0) { subi(r0, r1, i0); - SLTU(r0, _ZERO_REGNO, r0); + SLTIU(r0, r0, 1); + } else { + SLTIU(r0, r1, 1); } - else - SLTU(r0, _ZERO_REGNO, r1); - XORI(r0, r0, 1); } static void @@ -2059,173 +2898,19 @@ _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } static jit_word_t -_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr); - SLT(rn(reg), r0, r1); - w = _jit->pc.w; - BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - jit_unget_reg(reg); - - return (w); -} - -static jit_word_t -_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) -{ - jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - SLTU(rn(reg), r0, r1); - w = _jit->pc.w; - BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - jit_unget_reg(reg); - - return (w); -} - -static jit_word_t -_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) -{ - jit_word_t w; - jit_word_t d; - jit_int32_t reg; - jit_bool_t zero_p; - - if (!(zero_p = i1 == 0)) - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - if (can_sign_extend_short_p(i1)) { - if (!zero_p) - SLTI(rn(reg), r0, i1); - w = _jit->pc.w; - d = ((i0 - w) >> 2) - 1; - if (!zero_p) - BNE(rn(reg), _ZERO_REGNO, d); - else - BLTZ(r0, d); - NOP(1); - } - else { - movi(rn(reg), i1); - w = bltr(i0, r0, rn(reg)); - } - if (!zero_p) - jit_unget_reg(reg); - - return (w); -} - -static jit_word_t -_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) -{ - jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - if (can_sign_extend_short_p(i1)) { - SLTIU(rn(reg), r0, i1); - w = _jit->pc.w; - BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - } - else { - movi(rn(reg), i1); - w = bltr_u(i0, r0, rn(reg)); - } - jit_unget_reg(reg); - - return (w); -} - -static jit_word_t -_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) -{ - jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - SLT(rn(reg), r1, r0); - w = _jit->pc.w; - BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - jit_unget_reg(reg); - - return (w); -} - -static jit_word_t -_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) -{ - jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - SLTU(rn(reg), r1, r0); - w = _jit->pc.w; - BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - jit_unget_reg(reg); - - return (w); -} - -static jit_word_t -_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) -{ - jit_word_t w; - jit_int32_t reg; - - if (i1 == 0) { - w = _jit->pc.w; - BLEZ(r0, ((i0 - w) >> 2) - 1); - NOP(1); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - w = bler(i0, r0, rn(reg)); - jit_unget_reg(reg); - } - - return (w); -} - -static jit_word_t -_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) -{ - jit_word_t w; - jit_int32_t reg; - - if (i1 == 0) { - w = _jit->pc.w; - BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - w = bler_u(i0, r0, rn(reg)); - jit_unget_reg(reg); - } - - return (w); -} - -static jit_word_t -_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) -{ - jit_word_t w; - + jit_int32_t op, reg; + /* Just to not move incorrectly instruction to delay slot */ + reg = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, r0, r1); + op = pending(); + /* implicit flush() */ w = _jit->pc.w; BEQ(r0, r1, ((i0 - w) >> 2) - 1); - NOP(1); - + delay(op); + if (reg != JIT_NOREG) + jit_unget_reg(reg); return (w); } @@ -2233,179 +2918,168 @@ static jit_word_t _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { jit_word_t w; - jit_int32_t reg; - - if (i1 == 0) { - w = _jit->pc.w; - BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - } + jit_int32_t op, reg; + if (i1 == 0) + w = beqr(i0, r0, _ZERO_REGNO); else { - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO); + op = pending(); movi(rn(reg), i1); - w = beqr(i0, r0, rn(reg)); + flush(); + w = _jit->pc.w; + BEQ(r0, rn(reg), ((i0 - w) >> 2) - 1); + delay(op); jit_unget_reg(reg); } - return (w); } static jit_word_t -_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1, + jit_bool_t sltu) { jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - SLT(rn(reg), r0, r1); - w = _jit->pc.w; - BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - jit_unget_reg(reg); - - return (w); -} - -static jit_word_t -_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) -{ - jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - SLTU(rn(reg), r0, r1); + jit_int32_t op, reg; + reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1); + op = pending(); + if (sltu) + SLTU(rn(reg), r0, r1); + else + SLT(rn(reg), r0, r1); + flush(); w = _jit->pc.w; BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); + delay(op); jit_unget_reg(reg); - return (w); } static jit_word_t -_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1, + jit_bool_t sltiu, jit_bool_t bne) { jit_word_t w; jit_word_t d; - jit_int32_t reg; jit_bool_t zero_p; - - if (!(zero_p = i1 == 0)) - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + jit_int32_t op, t0, mask; + zero_p = !sltiu && i1 == 0; + /* Even if zero_p allocate one as a mean to avoid incorrect delay slot */ + mask = jit_class_gpr; + if (zero_p) + mask |= jit_class_chk; + t0 = jit_get_reg_for_delay_slot(mask, r0, _ZERO_REGNO); if (can_sign_extend_short_p(i1)) { - if (!zero_p) - SLTI(rn(reg), r0, i1); + op = pending(); + if (!zero_p) { + if (sltiu) + SLTIU(rn(t0), r0, i1); + else + SLTI(rn(t0), r0, i1); + } + flush(); w = _jit->pc.w; d = ((i0 - w) >> 2) - 1; - if (!zero_p) - BEQ(rn(reg), _ZERO_REGNO, d); - else - BGEZ(r0, d); - NOP(1); + if (bne) { + if (!zero_p) + BNE(rn(t0), _ZERO_REGNO, d); + else + BLTZ(r0, d); + } + else { + if (!zero_p) + BEQ(rn(t0), _ZERO_REGNO, d); + else + BGEZ(r0, d); + } } else { - movi(rn(reg), i1); - w = bger(i0, r0, rn(reg)); - } - if (!zero_p) - jit_unget_reg(reg); - - return (w); -} - -static jit_word_t -_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) -{ - jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - if (can_sign_extend_short_p(i1)) { - SLTIU(rn(reg), r0, i1); + op = pending(); + movi(rn(t0), i1); + if (sltiu) + SLTU(rn(t0), r0, rn(t0)); + else + SLT(rn(t0), r0, rn(t0)); + flush(); w = _jit->pc.w; - BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - } - else { - movi(rn(reg), i1); - w = bger_u(i0, r0, rn(reg)); + if (bne) + BNE(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1); + else + BEQ(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1); } - jit_unget_reg(reg); - - return (w); -} - -static jit_word_t -_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) -{ - jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - SLT(rn(reg), r1, r0); - w = _jit->pc.w; - BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - jit_unget_reg(reg); - + delay(op); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); return (w); } static jit_word_t -_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) +_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1, + jit_bool_t sltu, jit_bool_t inv) { jit_word_t w; - jit_int32_t reg; - - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - SLTU(rn(reg), r1, r0); + jit_int32_t op, reg; + reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1); + op = pending(); + if (sltu) + SLTU(rn(reg), r1, r0); + else + SLT(rn(reg), r1, r0); + flush(); w = _jit->pc.w; - BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); + if (inv) + BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); + else + BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1); + delay(op); jit_unget_reg(reg); - return (w); } static jit_word_t -_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) +_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1, + jit_bool_t sltiu, jit_bool_t inv) { jit_word_t w; - jit_int32_t reg; - + jit_int32_t op, t0, mask; + mask = jit_class_gpr; + if (i0 == 0) + mask |= jit_class_chk; + /* Allocate even if i0 == 0 as a way to avoid incorrect delay slot */ + t0 = jit_get_reg_for_delay_slot(mask, r0, _ZERO_REGNO); if (i1 == 0) { + op = pending(); + /* implicit flush() */ w = _jit->pc.w; - BGTZ(r0, ((i0 - w) >> 2) - 1); - NOP(1); + if (inv) { + if (sltiu) + BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1); + else + BLEZ(r0, ((i0 - w) >> 2) - 1); + } + else { + if (sltiu) + BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1); + else + BGTZ(r0, ((i0 - w) >> 2) - 1); + } } else { - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - w = bgtr(i0, r0, rn(reg)); - jit_unget_reg(reg); - } - - return (w); -} - -static jit_word_t -_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) -{ - jit_word_t w; - jit_int32_t reg; - - if (i1 == 0) { + op = pending(); + movi(rn(t0), i1); + if (sltiu) + SLTU(rn(t0), rn(t0), r0); + else + SLT(rn(t0), rn(t0), r0); + flush(); w = _jit->pc.w; - BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - } - else { - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi(rn(reg), i1); - w = bgtr_u(i0, r0, rn(reg)); - jit_unget_reg(reg); + if (inv) + BEQ(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1); + else + BNE(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1); } - + delay(op); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); return (w); } @@ -2413,11 +3087,16 @@ static jit_word_t _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t w; - + jit_int32_t op, reg; + /* Just to not move incorrectly instruction to delay slot */ + reg = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, r0, r1); + op = pending(); + /* implicit flush() */ w = _jit->pc.w; BNE(r0, r1, ((i0 - w) >> 2) - 1); - NOP(1); - + delay(op); + if (reg != JIT_NOREG) + jit_unget_reg(reg); return (w); } @@ -2425,48 +3104,85 @@ static jit_word_t _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { jit_word_t w; - jit_int32_t reg; - - if (i1 == 0) { - w = _jit->pc.w; - BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1); - NOP(1); - } + jit_int32_t op, reg; + if (i1 == 0) + w = bner(i0, r0, _ZERO_REGNO); else { - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); + reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO); + op = pending(); movi(rn(reg), i1); - w = bner(i0, r0, rn(reg)); + flush(); + w = _jit->pc.w; + BNE(r0, rn(reg), ((i0 - w) >> 2) - 1); + delay(op); jit_unget_reg(reg); } - return (w); } static void _jmpr(jit_state_t *_jit, jit_int32_t r0) { + jit_int32_t op, t0; + /* make sure delay slot does not use r0 */ + t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, + r0, _ZERO_REGNO); + op = pending(); JR(r0); - NOP(1); + delay(op); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); } static jit_word_t -_jmpi(jit_state_t *_jit, jit_word_t i0) -{ - jit_word_t w; - jit_int32_t reg; - +_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t patch) +{ + jit_int32_t op, t0; + jit_word_t w, disp; + /* try to get a pending instruction before the jump */ + t0 = jit_get_reg_for_delay_slot(jit_class_gpr, _ZERO_REGNO, _ZERO_REGNO); + op = pending(); + /* implicit flush() */ w = _jit->pc.w; - if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) { - J((i0 & ~0xf0000000) >> 2); - NOP(1); + if (jit_mips2_p()) { + disp = ((i0 - w) >> 2) - 1; + if (patch || can_sign_extend_short_p(disp)) { + BEQ(_ZERO_REGNO, _ZERO_REGNO, disp); + goto done; + } } + if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) + J((i0 & ~0xf0000000) >> 2); else { - reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - movi_p(rn(reg), i0); - jmpr(rn(reg)); - jit_unget_reg(reg); + if (patch) + w = movi_p(rn(t0), i0); + else + movi(rn(t0), i0); + JR(rn(t0)); } +done: + delay(op); + jit_unget_reg(t0); + return (w); +} +static jit_word_t +_jmpi_p(jit_state_t *_jit, jit_word_t i0) +{ + jit_word_t w; + jit_int32_t op, t0; + /* make sure delay slot does not use _T9_REGNO */ + t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, + _T9_REGNO, _ZERO_REGNO); + op = pending(); + /* implicit flush() */ + w = _jit->pc.w; + movi_p(rn(t0), i0); + flush(); /* movi_p will be patched */ + JR(rn(t0)); + delay(op); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); return (w); } @@ -2486,11 +3202,14 @@ _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) addr(rn(t1), r0, r1); /* t1 = r0 + r1 */ SLT(rn(t2), rn(t1), r0); /* t2 = t1 < r0 */ SLT(rn(t1), r0, rn(t1)); /* t1 = r0 < t1 */ - MOVZ(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */ + movzr(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */ + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1); /* delay slot */ addr(r0, r0, r1); + flush(); jit_unget_reg(t2); jit_unget_reg(t1); jit_unget_reg(t0); @@ -2514,11 +3233,14 @@ _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) addiu(rn(t1), r0, i1); SLT(rn(t2), r0, rn(t1)); SLT(rn(t1), rn(t1), r0); - MOVZ(rn(t1), rn(t2), rn(t0)); + movzr(rn(t1), rn(t2), rn(t0)); + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1); /* delay slot */ addiu(r0, r0, i1); + flush(); jit_unget_reg(t2); jit_unget_reg(t1); jit_unget_reg(t0); @@ -2543,10 +3265,13 @@ _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); addr(rn(t0), r0, r1); SLTU(rn(t1), rn(t0), r0); + flush(); + /* cannot optimize delay slot */ w = _jit->pc.w; BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1); /* delay slot */ movr(r0, rn(t0)); + flush(); jit_unget_reg(t1); jit_unget_reg(t0); return (w); @@ -2564,10 +3289,13 @@ _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); addiu(rn(t0), r0, i1); SLTU(rn(t1), rn(t0), r0); + flush(); + /* cannot optimize delay slot */ w = _jit->pc.w; BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1); /* delay slot */ movr(r0, rn(t0)); + flush(); jit_unget_reg(t1); jit_unget_reg(t0); } @@ -2596,11 +3324,14 @@ _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) addr(rn(t1), r0, r1); /* t1 = r0 + r1 */ SLT(rn(t2), rn(t1), r0); /* t2 = t1 < r0 */ SLT(rn(t1), r0, rn(t1)); /* t1 = r0 < t1 */ - MOVZ(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */ + movzr(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */ + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1); /* delay slot */ addr(r0, r0, r1); + flush(); jit_unget_reg(t2); jit_unget_reg(t1); jit_unget_reg(t0); @@ -2624,11 +3355,14 @@ _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) addiu(rn(t1), r0, i1); SLT(rn(t2), r0, rn(t1)); SLT(rn(t1), rn(t1), r0); - MOVZ(rn(t1), rn(t2), rn(t0)); + movzr(rn(t1), rn(t2), rn(t0)); + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1); /* delay slot */ addiu(r0, r0, i1); + flush(); jit_unget_reg(t2); jit_unget_reg(t1); jit_unget_reg(t0); @@ -2653,10 +3387,13 @@ _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); addr(rn(t0), r0, r1); SLTU(rn(t1), rn(t0), r0); + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1); /* delay slot */ movr(r0, rn(t0)); + flush(); jit_unget_reg(t1); jit_unget_reg(t0); return (w); @@ -2674,10 +3411,13 @@ _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); addiu(rn(t0), r0, i1); SLTU(rn(t1), rn(t0), r0); + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1); /* delay slot */ movr(r0, rn(t0)); + flush(); jit_unget_reg(t1); jit_unget_reg(t0); } @@ -2706,11 +3446,13 @@ _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) subr(rn(t1), r0, r1); /* t1 = r0 - r1 */ SLT(rn(t2), rn(t1), r0); /* t2 = t1 < r0 */ SLT(rn(t1), r0, rn(t1)); /* t1 = r0 < t1 */ - MOVZ(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */ + movzr(rn(t1), rn(t2), rn(t0)); /* if (r0 == 0) t1 = t2 */ + flush(); w = _jit->pc.w; BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1); /* delay slot */ subr(r0, r0, r1); + flush(); jit_unget_reg(t2); jit_unget_reg(t1); jit_unget_reg(t0); @@ -2734,11 +3476,13 @@ _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) addiu(rn(t1), r0, -i1); SLT(rn(t2), rn(t1), r0); SLT(rn(t1), r0, rn(t1)); - MOVZ(rn(t1), rn(t2), rn(t0)); + movzr(rn(t1), rn(t2), rn(t0)); + flush(); w = _jit->pc.w; BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1); /* delay slot */ addiu(r0, r0, -i1); + flush(); jit_unget_reg(t2); jit_unget_reg(t1); jit_unget_reg(t0); @@ -2763,10 +3507,13 @@ _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); subr(rn(t0), r0, r1); SLTU(rn(t1), r0, rn(t0)); + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1); /* delay slot */ movr(r0, rn(t0)); + flush(); jit_unget_reg(t1); jit_unget_reg(t0); return (w); @@ -2784,10 +3531,13 @@ _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); addiu(rn(t0), r0, -i1); SLTU(rn(t1), r0, rn(t0)); + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1); /* delay slot */ movr(r0, rn(t0)); + flush(); jit_unget_reg(t1); jit_unget_reg(t0); } @@ -2816,11 +3566,14 @@ _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) subr(rn(t1), r0, r1); /* t1 = r0 - r1 */ SLT(rn(t2), rn(t1), r0); /* t2 = t1 < r0 */ SLT(rn(t1), r0, rn(t1)); /* t1 = r0 < t1 */ - MOVZ(rn(t1), rn(t2), rn(t0)); /* if (t0 == 0) t1 = t2 */ + movzr(rn(t1), rn(t2), rn(t0)); /* if (t0 == 0) t1 = t2 */ + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1); /* delay slot */ subr(r0, r0, r1); + flush(); jit_unget_reg(t2); jit_unget_reg(t1); jit_unget_reg(t0); @@ -2844,11 +3597,14 @@ _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) addiu(rn(t1), r0, -i1); SLT(rn(t2), rn(t1), r0); SLT(rn(t1), r0, rn(t1)); - MOVZ(rn(t1), rn(t2), rn(t0)); + movzr(rn(t1), rn(t2), rn(t0)); + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1); /* delay slot */ addiu(r0, r0, -i1); + flush(); jit_unget_reg(t2); jit_unget_reg(t1); jit_unget_reg(t0); @@ -2873,10 +3629,13 @@ _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); subr(rn(t0), r0, r1); SLTU(rn(t1), r0, rn(t0)); + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1); /* delay slot */ movr(r0, rn(t0)); + flush(); jit_unget_reg(t1); jit_unget_reg(t0); return (w); @@ -2894,10 +3653,13 @@ _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) t1 = jit_get_reg(jit_class_gpr|jit_class_nospill); addiu(rn(t0), r0, -i1); SLTU(rn(t1), r0, rn(t0)); + /* cannot optimize delay slot */ + flush(); w = _jit->pc.w; BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1); /* delay slot */ movr(r0, rn(t0)); + flush(); jit_unget_reg(t1); jit_unget_reg(t0); } @@ -2914,12 +3676,14 @@ static jit_word_t _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t w; - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + jit_int32_t op, t0; + t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1); + op = pending(); AND(rn(t0), r0, r1); + flush(); w = _jit->pc.w; BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); - NOP(1); + delay(op); jit_unget_reg(t0); return (w); } @@ -2928,14 +3692,14 @@ static jit_word_t _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { jit_word_t w; - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); - + jit_int32_t op, t0; + t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO); + op = pending(); andi(rn(t0), r0, i1); + flush(); w = _jit->pc.w; BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); - NOP(1); - + delay(op); jit_unget_reg(t0); return (w); } @@ -2944,12 +3708,14 @@ static jit_word_t _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t w; - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); + jit_int32_t op, t0; + t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1); + op = pending(); AND(rn(t0), r0, r1); + flush(); w = _jit->pc.w; BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); - NOP(1); + delay(op); jit_unget_reg(t0); return (w); } @@ -2958,14 +3724,14 @@ static jit_word_t _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { jit_word_t w; - jit_int32_t t0; - t0 = jit_get_reg(jit_class_gpr|jit_class_nospill); - + jit_int32_t op, t0; + t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO); + op = pending(); andi(rn(t0), r0, i1); + flush(); w = _jit->pc.w; BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1); - NOP(1); - + delay(op); jit_unget_reg(t0); return (w); } @@ -2973,78 +3739,112 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) static void _callr(jit_state_t *_jit, jit_int32_t r0) { - JALR(r0); - if (r0 != _T9_REGNO) - movr(_T9_REGNO, r0); - else - NOP(1); + jit_int32_t op, t0; + if (r0 != _T9_REGNO) { + JALR(r0); + /* delay slot */ + movr(_T9_REGNO, r0); + flush(); + } + else { + /* make sure delay slot does not use r0 */ + t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, + r0, _ZERO_REGNO); + op = pending(); + JALR(r0); + delay(op); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); + } } -static void -_calli(jit_state_t *_jit, jit_word_t i0) +static jit_word_t +_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t patch) { - if (((_jit->pc.w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) { - if (can_sign_extend_short_p(i0)) { - JAL((i0 & ~0xf0000000) >> 2); - addiu(_T9_REGNO, _ZERO_REGNO, i0); - return; - } - - if (can_zero_extend_short_p(i0)) { - JAL((i0 & ~0xf0000000) >> 2); - ORI(_T9_REGNO, _ZERO_REGNO, i0); - return; + jit_int32_t op, t0; + jit_word_t w, disp; + w = _jit->pc.w; + if (jit_mips2_p()) { + disp = ((i0 - w) >> 2) - 1; + if (patch || can_sign_extend_short_p(disp)) { + op = pending(); + BGEZAL(_ZERO_REGNO, disp); /* Renamed to BAL in mips release 6 */ + delay(op); + goto done; + } + } + assert(!patch); + flush(); + if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) { + if (can_sign_extend_short_p(i0)) { + JAL((i0 & ~0xf0000000) >> 2); + /* delay slot */ + addiu(_T9_REGNO, _ZERO_REGNO, i0); + } + else if (can_zero_extend_short_p(i0)) { + JAL((i0 & ~0xf0000000) >> 2); + /* delay slot */ + ORI(_T9_REGNO, _ZERO_REGNO, i0); } - - if (can_sign_extend_int_p(i0)) { - if (i0 & 0xffff) { - LUI(_T9_REGNO, i0 >> 16); - JAL((i0 & ~0xf0000000) >> 2); - ORI(_T9_REGNO, _T9_REGNO, i0); - } else { - JAL((i0 & ~0xf0000000) >> 2); - LUI(_T9_REGNO, i0 >> 16); + else if (can_sign_extend_int_p(i0)) { + if (i0 & 0xffff) { + LUI(_T9_REGNO, i0 >> 16); + JAL((i0 & ~0xf0000000) >> 2); + /* delay slot */ + ORI(_T9_REGNO, _T9_REGNO, i0); } - return; + else { + JAL((i0 & ~0xf0000000) >> 2); + /* delay slot */ + LUI(_T9_REGNO, i0 >> 16); + } } + else + goto fallback; } - - movi(_T9_REGNO, i0); - JALR(_T9_REGNO); - NOP(1); + else { + fallback: + /* make sure delay slot does not use _T9_REGNO */ + t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, + _T9_REGNO, _ZERO_REGNO); + /* try to get an instruction before the call */ + op = pending(); + movi(_T9_REGNO, i0); + JALR(_T9_REGNO); + delay(op); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); + } + done: + return (w); } static jit_word_t _calli_p(jit_state_t *_jit, jit_word_t i0) { jit_word_t word; - + jit_int32_t op, t0; + /* make sure delay slot does not use _T9_REGNO */ + t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, + _T9_REGNO, _ZERO_REGNO); + op = pending(); + /* implicit flush() */ word = _jit->pc.w; movi_p(_T9_REGNO, i0); JALR(_T9_REGNO); - NOP(1); - + delay(op); + if (t0 != JIT_NOREG) + jit_unget_reg(t0); return (word); } -static jit_int32_t fregs[] = { - _F30, _F28, _F26, _F24, _F22, _F20, -#if !NEW_ABI - _F18, _F16, -#endif -}; - -static jit_int32_t iregs[] = { - _S7, _S6, _S5, _S4, _S3, _S2, _S1, _S0, -}; - static void _prolog(jit_state_t *_jit, jit_node_t *node) { - jit_int32_t index; - jit_int32_t offset; + jit_int32_t reg, offs; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; + jit_check_frame(); assert(_jitc->function->self.aoff >= frame); if (_jitc->function->assume_frame) return; @@ -3063,51 +3863,65 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* align stack at 8 bytes */ _jitc->function->self.aoff) + 7) & -8; #endif - /* callee save registers */ + #if NEW_ABI - if ((_jitc->function->self.call & jit_call_varargs) && - jit_arg_reg_p(_jitc->function->vagp)) - subi(_SP_REGNO, _SP_REGNO, stack_framesize + 64); - else + if (_jitc->function->stack) + _jitc->function->need_stack = 1; + if (!_jitc->function->need_frame && !_jitc->function->need_stack) { + /* check if any callee save register needs to be saved */ + for (reg = 0; reg < _jitc->reglen; ++reg) + if (jit_regset_tstbit(&_jitc->function->regset, reg) && + (_rvs[reg].spec & jit_class_sav)) { + _jitc->function->need_stack = 1; + break; + } + } +#else + /* Need always a frame due to the need to always allocate 16 bytes */ + jit_check_frame(); #endif - subi(_SP_REGNO, _SP_REGNO, stack_framesize); - offset = stack_framesize - (sizeof(jit_word_t) << 1); - for (index = 0; index < jit_size(fregs); index++, offset -= 8) { - if (jit_regset_tstbit(&_jitc->function->regset, fregs[index])) - stxi_d(offset, _SP_REGNO, rn(fregs[index])); - } - for (index = 0; index < jit_size(iregs); - index++, offset -= sizeof(jit_word_t)) { - if (jit_regset_tstbit(&_jitc->function->regset, iregs[index])) - stxi(offset, _SP_REGNO, rn(iregs[index])); - } - assert(offset >= sizeof(jit_word_t)); - stxi(offset, _SP_REGNO, _RA_REGNO); - stxi(0, _SP_REGNO, _BP_REGNO); - movr(_BP_REGNO, _SP_REGNO); + + if (_jitc->function->need_frame || _jitc->function->need_stack) + subi(_SP_REGNO, _SP_REGNO, jit_framesize()); + if (_jitc->function->need_frame) { + stxi(0, _SP_REGNO, _RA_REGNO); + stxi(STACK_SLOT, _SP_REGNO, _BP_REGNO); + } + /* callee save registers */ + for (reg = 0, offs = STACK_SLOT << 1; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + stxi(offs, _SP_REGNO, rn(iregs[reg])); + offs += STACK_SLOT; + } + } + for (reg = 0; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + stxi_d(offs, _SP_REGNO, rn(fregs[reg])); + offs += sizeof(jit_float64_t); + } + } + + if (_jitc->function->need_frame) + movr(_BP_REGNO, _SP_REGNO); /* alloca */ if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); if (_jitc->function->allocar) { - index = jit_get_reg(jit_class_gpr); - movi(rn(index), _jitc->function->self.aoff); - stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(index)); - jit_unget_reg(index); + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), _jitc->function->self.aoff); + stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(reg)); + jit_unget_reg(reg); } if (_jitc->function->self.call & jit_call_varargs) { + for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg) { + offs = jit_framesize() - ((NUM_WORD_ARGS - reg) * STACK_SLOT); #if NEW_ABI - index = _jitc->function->vagp; + SD(rn(_A0 - reg), offs, _BP_REGNO); #else - index = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT; -#endif - offset = stack_framesize + index * STACK_SLOT; - for (; jit_arg_reg_p(index); ++index, offset += STACK_SLOT) { -#if NEW_ABI - SD(rn(_A0 - index), offset, _BP_REGNO); -#else - stxi(offset + WORD_ADJUST, _BP_REGNO, rn(_A0 - index)); + offs += 16 + WORD_ADJUST; + stxi(offs, _BP_REGNO, rn(_A0 - reg)); #endif } } @@ -3116,48 +3930,51 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { - jit_int32_t index; - jit_int32_t offset; + jit_int32_t reg, offs; if (_jitc->function->assume_frame) return; + + if (_jitc->function->need_frame) { + movr(_SP_REGNO, _BP_REGNO); + ldxi(_RA_REGNO, _SP_REGNO, 0); + ldxi(_BP_REGNO, _SP_REGNO, STACK_SLOT); + } + /* callee save registers */ - movr(_SP_REGNO, _BP_REGNO); - offset = stack_framesize - (sizeof(jit_word_t) << 1); - for (index = 0; index < jit_size(fregs); index++, offset -= 8) { - if (jit_regset_tstbit(&_jitc->function->regset, fregs[index])) - ldxi_d(rn(fregs[index]), _SP_REGNO, offset); - } - for (index = 0; index < jit_size(iregs); - index++, offset -= sizeof(jit_word_t)) { - if (jit_regset_tstbit(&_jitc->function->regset, iregs[index])) - ldxi(rn(iregs[index]), _SP_REGNO, offset); - } - assert(offset >= sizeof(jit_word_t)); - ldxi(_RA_REGNO, _SP_REGNO, offset); - ldxi(_BP_REGNO, _SP_REGNO, 0); + for (reg = 0, offs = STACK_SLOT << 1; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + ldxi(rn(iregs[reg]), _SP_REGNO, offs); + offs += sizeof(jit_word_t); + } + } + for (reg = 0; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + ldxi_d(rn(fregs[reg]), _SP_REGNO, offs); + offs += sizeof(jit_float64_t); + } + } JR(_RA_REGNO); /* delay slot */ -#if NEW_ABI - if ((_jitc->function->self.call & jit_call_varargs) && - jit_arg_reg_p(_jitc->function->vagp)) - addi(_SP_REGNO, _SP_REGNO, stack_framesize + 64); + if (_jitc->function->need_frame || _jitc->function->need_stack) + addi(_SP_REGNO, _SP_REGNO, jit_framesize()); else -#endif - addi(_SP_REGNO, _SP_REGNO, stack_framesize); + NOP(1); + flush(); } static void _vastart(jit_state_t *_jit, jit_int32_t r0) { assert(_jitc->function->self.call & jit_call_varargs); - /* Initialize va_list to the first stack argument. */ #if NEW_ABI + /* Initialize va_list to the first stack argument. */ if (jit_arg_reg_p(_jitc->function->vagp)) - addi(r0, _BP_REGNO, stack_framesize + _jitc->function->vagp * - sizeof(jit_int64_t)); + addi(r0, _BP_REGNO, + jit_framesize() - + ((NUM_WORD_ARGS - _jitc->function->vagp) * STACK_SLOT)); else #endif - addi(r0, _BP_REGNO, _jitc->function->self.size); + addi(r0, _BP_REGNO, jit_selfsize()); } static void @@ -3247,16 +4064,31 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) break; case MIPS_COP1: case MIPS_COP2: - assert(i.rs.b == MIPS_BC); - switch (i.rt.b) { - case MIPS_BCF: case MIPS_BCFL: - case MIPS_BCT: case MIPS_BCTL: - i.is.b = ((label - instr) >> 2) - 1; - u.i[0] = i.op; - break; - default: - assert(!"unhandled branch opcode"); - break; + if (jit_mips6_p()) { + switch (i.rs.b) { + case MIPS_BC1EQZ: case MIPS_BC1NEZ: + assert(jit_mips6_p()); + i.is.b = ((label - instr) >> 2) - 1; + u.i[0] = i.op; + break; + default: + assert(!"unhandled branch opcode"); + break; + } + } + else { + assert(i.rs.b == MIPS_BC); + switch (i.rt.b) { + case MIPS_BCF: case MIPS_BCFL: + case MIPS_BCT: case MIPS_BCTL: + assert(!jit_mips6_p()); + i.is.b = ((label - instr) >> 2) - 1; + u.i[0] = i.op; + break; + default: + assert(!"unhandled branch opcode"); + break; + } } break; diff --git a/deps/lightning/lib/jit_mips-fpu.c b/deps/lightning/lib/jit_mips-fpu.c index 6209fd68..8e3df869 100644 --- a/deps/lightning/lib/jit_mips-fpu.c +++ b/deps/lightning/lib/jit_mips-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -27,6 +27,8 @@ # define MIPS_fmt_PS 0x16 /* 2 x float32 */ # define MIPS_fmt_S_PU 0x20 # define MIPS_fmt_S_PL 0x26 +# define MIPS_condn_S 0x14 /* release 6 */ +# define MIPS_condn_D 0x15 /* release 6 */ # define MIPS_ADD_fmt 0x00 # define MIPS_LWXC1 0x00 # define MIPS_SUB_fmt 0x01 @@ -88,6 +90,23 @@ # define MIPS_cond_NGE 0x3d # define MIPS_cond_LE 0x3e # define MIPS_cond_UGT 0x3f +/* Mips release 6 */ +# define MIPS_cmp_AF 0x00 +# define MIPS_cmp_UN 0x01 +# define MIPS_cmp_EQ 0x02 +# define MIPS_cmp_UEQ 0x03 +# define MIPS_cmp_LT 0x04 +# define MIPS_cmp_ULT 0x05 +# define MIPS_cmp_LE 0x06 +# define MIPS_cmp_ULE 0x07 +# define MIPS_cmp_SAF 0x08 +# define MIPS_cmp_SUN 0x09 +# define MIPS_cmp_SEQ 0x0a +# define MIPS_cmp_SUEQ 0x0b +# define MIPS_cmp_SLT 0x0c +# define MIPS_cmp_SULT 0x0d +# define MIPS_cmp_SLE 0x0e +# define MIPS_cmp_SULE 0x0f # define ADD_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_ADD_fmt) # define ADD_D(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_ADD_fmt) # define SUB_S(fd,fs,ft) hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_SUB_fmt) @@ -103,7 +122,9 @@ # define SQRT_S(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_SQRT_fmt) # define SQRT_D(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_SQRT_fmt) # define MFC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MF,rt,fs,0,0) +# define MFHC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MFH,rt,fs,0,0) # define MTC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MT,rt,fs,0,0) +# define MTHC1(rt, fs) hrrrit(MIPS_COP1,MIPS_MTH,rt,fs,0,0) # define DMFC1(rt, fs) hrrrit(MIPS_COP1,MIPS_DMF,rt,fs,0,0) # define DMTC1(rt, fs) hrrrit(MIPS_COP1,MIPS_DMT,rt,fs,0,0) # define CVT_D_S(fd,fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_D) @@ -130,7 +151,9 @@ # define MOV_S(fd, fs) hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_MOV_fmt) # define MOV_D(fd, fs) hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_MOV_fmt) # define BC1F(im) hrri(MIPS_COP1,MIPS_BC,MIPS_BCF,im) +# define BC1EQZ(ft,im) hrri(MIPS_COP1,MIPS_BC1EQZ,ft,im) # define BC1T(im) hrri(MIPS_COP1,MIPS_BC,MIPS_BCT,im) +# define BC1NEZ(ft,im) hrri(MIPS_COP1,MIPS_BC1NEZ,ft,im) # define C_F_S(fs,ft) c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_F) # define C_F_D(fs,ft) c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_F) # define C_F_PS(fs,ft) c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_F) @@ -183,6 +206,42 @@ static void _c_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t ft, jit_int32_t fs, jit_int32_t cc); +# define CMP_AF_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_AF) +# define CMP_AF_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_AF) +# define CMP_UN_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UN) +# define CMP_UN_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UN) +# define CMP_EQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_EQ) +# define CMP_EQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_EQ) +# define CMP_UEQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UEQ) +# define CMP_UEQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UEQ) +# define CMP_LT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LT) +# define CMP_LT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LT) +# define CMP_ULT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULT) +# define CMP_ULT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULT) +# define CMP_LE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LE) +# define CMP_LE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LE) +# define CMP_ULE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULE) +# define CMP_ULE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULE) +# define CMP_SAF_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SAF) +# define CMP_SAF_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SAF) +# define CMP_SUN_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUN) +# define CMP_SUN_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUN) +# define CMP_SEQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SEQ) +# define CMP_SEQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SEQ) +# define CMP_SUEQ_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUEQ) +# define CMP_SUEQ_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUEQ) +# define CMP_SLT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLT) +# define CMP_SLT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLT) +# define CMP_SULT_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULT) +# define CMP_SULT_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULT) +# define CMP_SLE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLE) +# define CMP_SLE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLE) +# define CMP_SULE_S(fd,fs,ft) cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULE) +# define CMP_SULE_D(fd,fs,ft) cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULE) +# define cmp_cond_fmt(fm,fd,ft,fs,cn) _cmp_cond_fmt(_jit,fm,fd,ft,fs,cn) +static void +_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd, + jit_int32_t ft, jit_int32_t fs, jit_int32_t cn); # define addr_f(r0,r1,r2) ADD_S(r0,r1,r2) # define addi_f(r0,r1,i0) _addi_f(_jit,r0,r1,i0) static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*); @@ -220,7 +279,7 @@ static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*); # define sqrtr_f(r0,r1) SQRT_S(r0,r1) # define sqrtr_d(r0,r1) SQRT_D(r0,r1) # define movr_w_f(r0, r1) MTC1(r1, r0) -# define movr_f_w(r0, r1) MFC1(r1, r0) +# define movr_f_w(r0, r1) MFC1(r0, r1) # define movi_f_w(r0, i0) _movi_f_w(_jit, r0, i0) static void _movi_f_w(jit_state_t*,jit_int32_t,jit_float32_t*); # define extr_f(r0, r1) _extr_f(_jit, r0, r1) @@ -565,7 +624,22 @@ _c_cond_fmt(jit_state_t *_jit, jit_int32_t fm, i.ft.b = ft; i.fm.b = fm; i.hc.b = MIPS_COP1; - ii(i.op); + instr(i.op); +} + +static void +_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd, + jit_int32_t ft, jit_int32_t fs, jit_int32_t cn) +{ + jit_instr_t i; + i.op = 0; /* must have bit 6 zero ed */ + i.cn.b = cn; + i.ft.b = ft; + i.fs.b = fs; + i.fd.b = fd; + i.fm.b = fm; + i.hc.b = MIPS_COP1; + instr(i.op); } # define fpr_opi(name, type, size) \ @@ -829,16 +903,28 @@ static void _movr_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { assert(r1 == r2 - 1); - MTC1(r1, r0 + BE_P); - MTC1(r2, r0 + LE_P); + if (jit_mips6_p()) { + MTC1(r1, r0); + MTHC1(r2, r0); + } + else { + MTC1(r1, r0 + BE_P); + MTC1(r2, r0 + LE_P); + } } static void _movr_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { assert(r0 == r1 - 1); - MFC1(r0, r2 + BE_P); - MFC1(r1, r2 + LE_P); + if (jit_mips6_p()) { + MFC1(r0, r2); + MFHC1(r1, r2); + } + else { + MFC1(r0, r2 + BE_P); + MFC1(r1, r2 + LE_P); + } } static void @@ -896,40 +982,40 @@ _truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { -# if __WORDSIZE == 64 || NEW_ABI - LDC1(r0, 0, r1); -# else - LWC1(r0 + BE_P, 0, r1); - LWC1(r0 + LE_P, 4, r1); -# endif + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) + LDC1(r0, 0, r1); + else { + LWC1(r0 + BE_P, 0, r1); + LWC1(r0 + LE_P, 4, r1); + } } static void _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; -# if __WORDSIZE == 64 || NEW_ABI - if (can_sign_extend_short_p(i0)) - LDC1(r0, i0, _ZERO_REGNO); - else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - LDC1(r0, 0, rn(reg)); - jit_unget_reg(reg); - } -# else - if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { - LWC1(r0 + BE_P, i0, _ZERO_REGNO); - LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO); + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) { + if (can_sign_extend_short_p(i0)) + LDC1(r0, i0, _ZERO_REGNO); + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + LDC1(r0, 0, rn(reg)); + jit_unget_reg(reg); + } } else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - LWC1(r0 + BE_P, 0, rn(reg)); - LWC1(r0 + LE_P, 4, rn(reg)); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { + LWC1(r0 + BE_P, i0, _ZERO_REGNO); + LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO); + } + else { + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + ldr_d(r0, rn(reg)); + jit_unget_reg(reg); + } } -# endif } static void @@ -946,52 +1032,60 @@ static void _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; -# if __WORDSIZE == 64 || NEW_ABI - if (can_sign_extend_short_p(i0)) - LDC1(r0, i0, r1); -# else - if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { - LWC1(r0 + BE_P, i0, r1); - LWC1(r0 + LE_P, i0 + 4, r1); + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) { + if (can_sign_extend_short_p(i0)) + LDC1(r0, i0, r1); + else + goto fallback; } -# endif else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r1, i0); - ldr_d(r0, rn(reg)); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { + LWC1(r0 + BE_P, i0, r1); + LWC1(r0 + LE_P, i0 + 4, r1); + } + else { + fallback: + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r1, i0); + ldr_d(r0, rn(reg)); + jit_unget_reg(reg); + } } } static void _str_d(jit_state_t *_jit,jit_int32_t r0, jit_int32_t r1) { -# if __WORDSIZE == 64 || NEW_ABI - SDC1(r1, 0, r0); -# else - SWC1(r1 + BE_P, 0, r0); - SWC1(r1 + LE_P, 4, r0); -# endif + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) + SDC1(r1, 0, r0); + else { + SWC1(r1 + BE_P, 0, r0); + SWC1(r1 + LE_P, 4, r0); + } } static void _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; -# if __WORDSIZE == 64 || NEW_ABI - if (can_sign_extend_short_p(i0)) - SDC1(r0, i0, _ZERO_REGNO); -# else - if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { - SWC1(r0 + BE_P, i0, _ZERO_REGNO); - SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO); + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) { + if (can_sign_extend_short_p(i0)) + SDC1(r0, i0, _ZERO_REGNO); + else + goto fallback; } -# endif else { - reg = jit_get_reg(jit_class_gpr); - movi(rn(reg), i0); - str_d(rn(reg), r0); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { + SWC1(r0 + BE_P, i0, _ZERO_REGNO); + SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO); + } + else { + fallback: + reg = jit_get_reg(jit_class_gpr); + movi(rn(reg), i0); + str_d(rn(reg), r0); + jit_unget_reg(reg); + } } } @@ -1009,20 +1103,24 @@ static void _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_int32_t reg; -# if __WORDSIZE == 64 || NEW_ABI - if (can_sign_extend_short_p(i0)) - SDC1(r1, i0, r0); -# else - if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { - SWC1(r1 + BE_P, i0, r0); - SWC1(r1 + LE_P, i0 + 4, r0); + if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) { + if (can_sign_extend_short_p(i0)) + SDC1(r1, i0, r0); + else + goto fallback; } -# endif else { - reg = jit_get_reg(jit_class_gpr); - addi(rn(reg), r0, i0); - str_d(rn(reg), r1); - jit_unget_reg(reg); + if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) { + SWC1(r1 + BE_P, i0, r0); + SWC1(r1 + LE_P, i0 + 4, r0); + } + else { + fallback: + reg = jit_get_reg(jit_class_gpr); + addi(rn(reg), r0, i0); + str_d(rn(reg), r1); + jit_unget_reg(reg); + } } } @@ -1058,30 +1156,49 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) else DMTC1(_ZERO_REGNO, r0); # else - if (_jitc->no_data) - reg = jit_get_reg(jit_class_gpr); - if (data.i[0]) { + if (jit_mips6_p()) { if (_jitc->no_data) { - movi(rn(reg), data.i[0]); - MTC1(rn(reg), r0 + BE_P); + reg = jit_get_reg(jit_class_gpr); +# if __WORDSIZE == 64 + movi(rn(reg), data.l); + DMTC1(rn(reg), r0); +# else + movi(rn(reg), data.i[0 + BE_P]); + MTC1(rn(reg), r0); + movi(rn(reg), data.i[0 + LE_P]); + MTHC1(rn(reg), r0); +# endif + jit_unget_reg(reg); } else - ldi_f(r0 + BE_P, (jit_word_t)i0); + ldi_d(r0, (jit_word_t)i0); } - else - MTC1(_ZERO_REGNO, r0 + BE_P); - if (data.i[1]) { - if (_jitc->no_data) { - movi(rn(reg), data.i[1]); - MTC1(rn(reg), r0 + LE_P); + else { + if (_jitc->no_data) + reg = jit_get_reg(jit_class_gpr); + if (data.i[0]) { + if (_jitc->no_data) { + movi(rn(reg), data.i[0]); + MTC1(rn(reg), r0 + BE_P); + } + else + ldi_f(r0 + BE_P, (jit_word_t)i0); } else - ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4); + MTC1(_ZERO_REGNO, r0 + BE_P); + if (data.i[1]) { + if (_jitc->no_data) { + movi(rn(reg), data.i[1]); + MTC1(rn(reg), r0 + LE_P); + } + else + ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4); + } + else + MTC1(_ZERO_REGNO, r0 + LE_P); + if (_jitc->no_data) + jit_unget_reg(reg); } - else - MTC1(_ZERO_REGNO, r0 + LE_P); - if (_jitc->no_data) - jit_unget_reg(reg); # endif } @@ -1089,13 +1206,26 @@ static void _ltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LT_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_OLT_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(lt) @@ -1103,13 +1233,26 @@ static void _ler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LE_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_OLE_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(le) @@ -1117,13 +1260,26 @@ static void _eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_EQ_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_EQ_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(eq) @@ -1131,13 +1287,26 @@ static void _ger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULT_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_ULT_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ge) @@ -1145,13 +1314,26 @@ static void _gtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULE_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_ULE_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(gt) @@ -1159,13 +1341,26 @@ static void _ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_EQ_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_EQ_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ne) @@ -1173,13 +1368,26 @@ static void _unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULT_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_ULT_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(unlt) @@ -1187,13 +1395,26 @@ static void _unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULE_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_ULE_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(unle) @@ -1201,13 +1422,26 @@ static void _uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UEQ_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_UEQ_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(uneq) @@ -1215,13 +1449,26 @@ static void _unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LT_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_OLT_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(unge) @@ -1229,13 +1476,26 @@ static void _ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LE_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_OLE_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ungt) @@ -1243,13 +1503,26 @@ static void _ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UEQ_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_UEQ_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ltgt) @@ -1257,13 +1530,26 @@ static void _ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_S(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UN_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_UN_S(r1, r2); + flush(); + /* cannot optimize delay slot */ + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(ord) @@ -1271,13 +1557,26 @@ static void _unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_S(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UN_S(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_UN_S(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } fopi(unord) @@ -1285,10 +1584,25 @@ static jit_word_t _bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LT_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLT_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(lt) @@ -1297,10 +1611,25 @@ static jit_word_t _bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LE_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLE_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(le) @@ -1309,10 +1638,25 @@ static jit_word_t _beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_EQ_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_EQ_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(eq) @@ -1321,10 +1665,25 @@ static jit_word_t _bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULT_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULT_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ge) @@ -1333,10 +1692,25 @@ static jit_word_t _bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULE_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULE_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(gt) @@ -1345,10 +1719,25 @@ static jit_word_t _bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_EQ_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_EQ_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ne) @@ -1357,10 +1746,25 @@ static jit_word_t _bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULT_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULT_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(unlt) @@ -1369,10 +1773,25 @@ static jit_word_t _bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULE_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULE_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(unle) @@ -1381,10 +1800,25 @@ static jit_word_t _buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UEQ_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UEQ_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(uneq) @@ -1393,10 +1827,25 @@ static jit_word_t _bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LT_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLT_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(unge) @@ -1405,10 +1854,25 @@ static jit_word_t _bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LE_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLE_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ungt) @@ -1417,10 +1881,25 @@ static jit_word_t _bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UEQ_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UEQ_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ltgt) @@ -1429,10 +1908,25 @@ static jit_word_t _bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_S(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UN_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UN_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(ord) @@ -1441,10 +1935,25 @@ static jit_word_t _bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_S(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UN_S(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UN_S(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } fbopi(unord) @@ -1453,13 +1962,26 @@ static void _ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LT_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_OLT_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(lt) @@ -1467,13 +1989,26 @@ static void _ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LE_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_OLE_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(le) @@ -1481,13 +2016,26 @@ static void _eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_EQ_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_EQ_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(eq) @@ -1495,13 +2043,26 @@ static void _ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULT_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_ULT_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ge) @@ -1509,13 +2070,26 @@ static void _gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULE_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_ULE_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(gt) @@ -1523,13 +2097,26 @@ static void _ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_EQ_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_EQ_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ne) @@ -1537,13 +2124,26 @@ static void _unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULT_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_ULT_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(unlt) @@ -1551,13 +2151,26 @@ static void _unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_ULE_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_ULE_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(unle) @@ -1565,13 +2178,26 @@ static void _uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UEQ_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_UEQ_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(uneq) @@ -1579,13 +2205,26 @@ static void _unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LT_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_OLT_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(unge) @@ -1593,13 +2232,26 @@ static void _ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_LE_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_OLE_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ungt) @@ -1607,13 +2259,26 @@ static void _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UEQ_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_UEQ_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ltgt) @@ -1621,13 +2286,26 @@ static void _ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_D(r1, r2); - w = _jit->pc.w; - BC1F(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UN_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + addi(r0, r0, 1); + } + else { + C_UN_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1F(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(ord) @@ -1635,13 +2313,26 @@ static void _unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_D(r1, r2); - w = _jit->pc.w; - BC1T(0); - /* delay slot */ - movi(r0, 1); - movi(r0, 0); - patch_at(w, _jit->pc.w); + jit_int32_t reg; + if (jit_mips6_p()) { + reg = jit_get_reg(jit_class_fpr); + CMP_UN_D(rn(reg), r1, r2); + MFC1(r0, rn(reg)); + jit_unget_reg(reg); + andi(r0, r0, 1); + } + else { + C_UN_D(r1, r2); + /* cannot optimize delay slot */ + flush(); + w = _jit->pc.w; + BC1T(0); + /* delay slot */ + movi(r0, 1); + movi(r0, 0); + flush(); + patch_at(w, _jit->pc.w); + } } dopi(unord) @@ -1649,10 +2340,25 @@ static jit_word_t _bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LT_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLT_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(lt) @@ -1661,10 +2367,25 @@ static jit_word_t _bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LE_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLE_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(le) @@ -1673,10 +2394,25 @@ static jit_word_t _beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_EQ_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_EQ_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(eq) @@ -1685,10 +2421,25 @@ static jit_word_t _bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULT_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULT_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ge) @@ -1697,10 +2448,25 @@ static jit_word_t _bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULE_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULE_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(gt) @@ -1709,10 +2475,25 @@ static jit_word_t _bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_EQ_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_EQ_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_EQ_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ne) @@ -1721,10 +2502,25 @@ static jit_word_t _bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULT_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULT_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULT_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(unlt) @@ -1733,10 +2529,25 @@ static jit_word_t _bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_ULE_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_ULE_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_ULE_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(unle) @@ -1745,10 +2556,25 @@ static jit_word_t _buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UEQ_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UEQ_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(uneq) @@ -1757,10 +2583,25 @@ static jit_word_t _bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLT_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LT_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLT_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(unge) @@ -1769,10 +2610,25 @@ static jit_word_t _bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_OLE_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_LE_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_OLE_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ungt) @@ -1781,10 +2637,25 @@ static jit_word_t _bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UEQ_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UEQ_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UEQ_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ltgt) @@ -1793,10 +2664,25 @@ static jit_word_t _bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_D(r1, r2); - w = _jit->pc.w; - BC1F(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UN_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UN_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1F(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(ord) @@ -1805,10 +2691,25 @@ static jit_word_t _bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2) { jit_word_t w; - C_UN_D(r1, r2); - w = _jit->pc.w; - BC1T(((i0 - w) >> 2) - 1); - NOP(1); + jit_int32_t op, reg; + if (jit_mips6_p()) { + reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2); + op = pending(); + CMP_UN_D(rn(reg), r1, r2); + flush(); + w = _jit->pc.w; + BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1); + } + else { + reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2); + op = pending(); + C_UN_D(r1, r2); + flush(); + w = _jit->pc.w; + BC1T(((i0 - w) >> 2) - 1); + } + delay(op); + jit_unget_reg(reg); return (w); } dbopi(unord) diff --git a/deps/lightning/lib/jit_mips-sz.c b/deps/lightning/lib/jit_mips-sz.c index 91deb4b4..0a7436de 100644 --- a/deps/lightning/lib/jit_mips-sz.c +++ b/deps/lightning/lib/jit_mips-sz.c @@ -1,423 +1,12 @@ #if __WORDSIZE == 32 -#if NEW_ABI -#define JIT_INSTR_MAX 52 - 0, /* data */ - 0, /* live */ - 0, /* align */ - 0, /* save */ - 0, /* load */ - 0, /* #name */ - 0, /* #note */ - 0, /* label */ - 44, /* prolog */ - 0, /* ellipsis */ - 0, /* va_push */ - 0, /* allocai */ - 0, /* allocar */ - 0, /* arg */ - 0, /* getarg_c */ - 0, /* getarg_uc */ - 0, /* getarg_s */ - 0, /* getarg_us */ - 0, /* getarg_i */ - 0, /* getarg_ui */ - 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 0, /* va_start */ - 0, /* va_arg */ - 0, /* va_arg_d */ - 0, /* va_end */ - 4, /* addr */ - 12, /* addi */ - 12, /* addcr */ - 20, /* addci */ - 28, /* addxr */ - 28, /* addxi */ - 4, /* subr */ - 12, /* subi */ - 12, /* subcr */ - 20, /* subci */ - 28, /* subxr */ - 28, /* subxi */ - 16, /* rsbi */ - 4, /* mulr */ - 12, /* muli */ - 12, /* qmulr */ - 20, /* qmuli */ - 12, /* qmulr_u */ - 20, /* qmuli_u */ - 8, /* divr */ - 16, /* divi */ - 8, /* divr_u */ - 16, /* divi_u */ - 12, /* qdivr */ - 16, /* qdivi */ - 12, /* qdivr_u */ - 16, /* qdivi_u */ - 8, /* remr */ - 16, /* remi */ - 8, /* remr_u */ - 16, /* remi_u */ - 4, /* andr */ - 12, /* andi */ - 4, /* orr */ - 12, /* ori */ - 4, /* xorr */ - 12, /* xori */ - 4, /* lshr */ - 4, /* lshi */ - 4, /* rshr */ - 4, /* rshi */ - 4, /* rshr_u */ - 4, /* rshi_u */ - 4, /* negr */ - 8, /* comr */ - 4, /* ltr */ - 4, /* lti */ - 4, /* ltr_u */ - 4, /* lti_u */ - 8, /* ler */ - 12, /* lei */ - 8, /* ler_u */ - 12, /* lei_u */ - 12, /* eqr */ - 12, /* eqi */ - 8, /* ger */ - 12, /* gei */ - 8, /* ger_u */ - 12, /* gei_u */ - 4, /* gtr */ - 8, /* gti */ - 4, /* gtr_u */ - 8, /* gti_u */ - 8, /* ner */ - 8, /* nei */ - 4, /* movr */ - 8, /* movi */ - 4, /* movnr */ - 4, /* movzr */ - 8, /* extr_c */ - 4, /* extr_uc */ - 8, /* extr_s */ - 4, /* extr_us */ - 0, /* extr_i */ - 0, /* extr_ui */ - 4, /* htonr_us */ - 4, /* htonr_ui */ - 0, /* htonr_ul */ - 4, /* ldr_c */ - 12, /* ldi_c */ - 4, /* ldr_uc */ - 12, /* ldi_uc */ - 4, /* ldr_s */ - 12, /* ldi_s */ - 4, /* ldr_us */ - 12, /* ldi_us */ - 4, /* ldr_i */ - 12, /* ldi_i */ - 0, /* ldr_ui */ - 0, /* ldi_ui */ - 0, /* ldr_l */ - 0, /* ldi_l */ - 8, /* ldxr_c */ - 4, /* ldxi_c */ - 8, /* ldxr_uc */ - 4, /* ldxi_uc */ - 8, /* ldxr_s */ - 4, /* ldxi_s */ - 8, /* ldxr_us */ - 4, /* ldxi_us */ - 8, /* ldxr_i */ - 4, /* ldxi_i */ - 0, /* ldxr_ui */ - 0, /* ldxi_ui */ - 0, /* ldxr_l */ - 0, /* ldxi_l */ - 4, /* str_c */ - 12, /* sti_c */ - 4, /* str_s */ - 12, /* sti_s */ - 4, /* str_i */ - 12, /* sti_i */ - 0, /* str_l */ - 0, /* sti_l */ - 8, /* stxr_c */ - 4, /* stxi_c */ - 8, /* stxr_s */ - 4, /* stxi_s */ - 8, /* stxr_i */ - 4, /* stxi_i */ - 0, /* stxr_l */ - 0, /* stxi_l */ - 12, /* bltr */ - 12, /* blti */ - 12, /* bltr_u */ - 12, /* blti_u */ - 12, /* bler */ - 16, /* blei */ - 12, /* bler_u */ - 16, /* blei_u */ - 8, /* beqr */ - 16, /* beqi */ - 12, /* bger */ - 12, /* bgei */ - 12, /* bger_u */ - 12, /* bgei_u */ - 12, /* bgtr */ - 16, /* bgti */ - 12, /* bgtr_u */ - 16, /* bgti_u */ - 8, /* bner */ - 16, /* bnei */ - 12, /* bmsr */ - 12, /* bmsi */ - 12, /* bmcr */ - 12, /* bmci */ - 28, /* boaddr */ - 28, /* boaddi */ - 16, /* boaddr_u */ - 20, /* boaddi_u */ - 28, /* bxaddr */ - 28, /* bxaddi */ - 16, /* bxaddr_u */ - 20, /* bxaddi_u */ - 28, /* bosubr */ - 28, /* bosubi */ - 16, /* bosubr_u */ - 20, /* bosubi_u */ - 28, /* bxsubr */ - 28, /* bxsubi */ - 16, /* bxsubr_u */ - 20, /* bxsubi_u */ - 0, /* jmpr */ - 8, /* jmpi */ - 12, /* callr */ - 16, /* calli */ - 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ - 0, /* finishr */ - 0, /* finishi */ - 0, /* ret */ - 0, /* retr */ - 0, /* reti */ - 0, /* retval_c */ - 0, /* retval_uc */ - 0, /* retval_s */ - 0, /* retval_us */ - 0, /* retval_i */ - 0, /* retval_ui */ - 0, /* retval_l */ - 44, /* epilog */ - 0, /* arg_f */ - 0, /* getarg_f */ - 0, /* putargr_f */ - 0, /* putargi_f */ - 4, /* addr_f */ - 16, /* addi_f */ - 4, /* subr_f */ - 16, /* subi_f */ - 16, /* rsbi_f */ - 4, /* mulr_f */ - 16, /* muli_f */ - 4, /* divr_f */ - 16, /* divi_f */ - 4, /* negr_f */ - 4, /* absr_f */ - 4, /* sqrtr_f */ - 16, /* ltr_f */ - 28, /* lti_f */ - 16, /* ler_f */ - 28, /* lei_f */ - 16, /* eqr_f */ - 28, /* eqi_f */ - 16, /* ger_f */ - 28, /* gei_f */ - 16, /* gtr_f */ - 28, /* gti_f */ - 16, /* ner_f */ - 28, /* nei_f */ - 16, /* unltr_f */ - 28, /* unlti_f */ - 16, /* unler_f */ - 28, /* unlei_f */ - 16, /* uneqr_f */ - 28, /* uneqi_f */ - 16, /* unger_f */ - 28, /* ungei_f */ - 16, /* ungtr_f */ - 28, /* ungti_f */ - 16, /* ltgtr_f */ - 28, /* ltgti_f */ - 16, /* ordr_f */ - 28, /* ordi_f */ - 16, /* unordr_f */ - 28, /* unordi_f */ - 8, /* truncr_f_i */ - 0, /* truncr_f_l */ - 8, /* extr_f */ - 4, /* extr_d_f */ - 4, /* movr_f */ - 12, /* movi_f */ - 4, /* ldr_f */ - 12, /* ldi_f */ - 8, /* ldxr_f */ - 4, /* ldxi_f */ - 4, /* str_f */ - 12, /* sti_f */ - 8, /* stxr_f */ - 4, /* stxi_f */ - 12, /* bltr_f */ - 24, /* blti_f */ - 12, /* bler_f */ - 24, /* blei_f */ - 12, /* beqr_f */ - 24, /* beqi_f */ - 12, /* bger_f */ - 24, /* bgei_f */ - 12, /* bgtr_f */ - 24, /* bgti_f */ - 12, /* bner_f */ - 24, /* bnei_f */ - 12, /* bunltr_f */ - 24, /* bunlti_f */ - 12, /* bunler_f */ - 24, /* bunlei_f */ - 12, /* buneqr_f */ - 24, /* buneqi_f */ - 12, /* bunger_f */ - 24, /* bungei_f */ - 12, /* bungtr_f */ - 24, /* bungti_f */ - 12, /* bltgtr_f */ - 24, /* bltgti_f */ - 12, /* bordr_f */ - 24, /* bordi_f */ - 12, /* bunordr_f */ - 24, /* bunordi_f */ - 0, /* pushargr_f */ - 0, /* pushargi_f */ - 0, /* retr_f */ - 0, /* reti_f */ - 0, /* retval_f */ - 0, /* arg_d */ - 0, /* getarg_d */ - 0, /* putargr_d */ - 0, /* putargi_d */ - 4, /* addr_d */ - 16, /* addi_d */ - 4, /* subr_d */ - 16, /* subi_d */ - 16, /* rsbi_d */ - 4, /* mulr_d */ - 16, /* muli_d */ - 4, /* divr_d */ - 16, /* divi_d */ - 4, /* negr_d */ - 4, /* absr_d */ - 4, /* sqrtr_d */ - 16, /* ltr_d */ - 28, /* lti_d */ - 16, /* ler_d */ - 28, /* lei_d */ - 16, /* eqr_d */ - 28, /* eqi_d */ - 16, /* ger_d */ - 28, /* gei_d */ - 16, /* gtr_d */ - 28, /* gti_d */ - 16, /* ner_d */ - 28, /* nei_d */ - 16, /* unltr_d */ - 28, /* unlti_d */ - 16, /* unler_d */ - 28, /* unlei_d */ - 16, /* uneqr_d */ - 28, /* uneqi_d */ - 16, /* unger_d */ - 28, /* ungei_d */ - 16, /* ungtr_d */ - 28, /* ungti_d */ - 16, /* ltgtr_d */ - 28, /* ltgti_d */ - 16, /* ordr_d */ - 28, /* ordi_d */ - 16, /* unordr_d */ - 28, /* unordi_d */ - 8, /* truncr_d_i */ - 0, /* truncr_d_l */ - 8, /* extr_d */ - 4, /* extr_f_d */ - 4, /* movr_d */ - 12, /* movi_d */ - 4, /* ldr_d */ - 12, /* ldi_d */ - 8, /* ldxr_d */ - 4, /* ldxi_d */ - 4, /* str_d */ - 12, /* sti_d */ - 8, /* stxr_d */ - 4, /* stxi_d */ - 12, /* bltr_d */ - 24, /* blti_d */ - 12, /* bler_d */ - 24, /* blei_d */ - 12, /* beqr_d */ - 24, /* beqi_d */ - 12, /* bger_d */ - 24, /* bgei_d */ - 12, /* bgtr_d */ - 24, /* bgti_d */ - 12, /* bner_d */ - 24, /* bnei_d */ - 12, /* bunltr_d */ - 24, /* bunlti_d */ - 12, /* bunler_d */ - 24, /* bunlei_d */ - 12, /* buneqr_d */ - 24, /* buneqi_d */ - 12, /* bunger_d */ - 24, /* bungei_d */ - 12, /* bungtr_d */ - 24, /* bungti_d */ - 12, /* bltgtr_d */ - 24, /* bltgti_d */ - 12, /* bordr_d */ - 24, /* bordi_d */ - 12, /* bunordr_d */ - 24, /* bunordi_d */ - 0, /* pushargr_d */ - 0, /* pushargi_d */ - 0, /* retr_d */ - 0, /* reti_d */ - 0, /* retval_d */ - 0, /* movr_w_f */ - 0, /* movr_ww_d */ - 0, /* movr_w_d */ - 0, /* movr_f_w */ - 0, /* movi_f_w */ - 0, /* movr_d_ww */ - 0, /* movi_d_ww */ - 4, /* movr_d_w */ - 12, /* movi_d_w */ - 20, /* bswapr_us */ - 52, /* bswapr_ui */ - 0, /* bswapr_ul */ - 36, /* casr */ - 44, /* casi */ -#endif /* NEW_ABI */ -#endif /* __WORDSIZE */ - -#if __WORDSIZE == 32 -#if !NEW_ABI #define JIT_INSTR_MAX 116 0, /* data */ 0, /* live */ - 0, /* align */ + 20, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -426,7 +15,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -434,8 +26,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 20, /* va_arg_d */ @@ -472,7 +76,7 @@ 8, /* remr_u */ 16, /* remi_u */ 4, /* andr */ - 12, /* andi */ + 8, /* andi */ 4, /* orr */ 12, /* ori */ 4, /* xorr */ @@ -490,15 +94,15 @@ 4, /* ltr_u */ 4, /* lti_u */ 8, /* ler */ - 12, /* lei */ + 4, /* lei */ 8, /* ler_u */ - 12, /* lei_u */ - 12, /* eqr */ - 12, /* eqi */ + 4, /* lei_u */ + 8, /* eqr */ + 8, /* eqi */ 8, /* ger */ - 12, /* gei */ + 8, /* gei */ 8, /* ger_u */ - 12, /* gei_u */ + 8, /* gei_u */ 4, /* gtr */ 8, /* gti */ 4, /* gtr_u */ @@ -509,14 +113,19 @@ 8, /* movi */ 4, /* movnr */ 4, /* movzr */ - 8, /* extr_c */ + 36, /* casr */ + 44, /* casi */ + 4, /* extr_c */ 4, /* extr_uc */ - 8, /* extr_s */ + 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ - 20, /* htonr_us */ - 52, /* htonr_ui */ + 8, /* bswapr_us */ + 8, /* bswapr_ui */ + 0, /* bswapr_ul */ + 4, /* htonr_us */ + 4, /* htonr_ui */ 0, /* htonr_ul */ 4, /* ldr_c */ 12, /* ldi_c */ @@ -604,16 +213,40 @@ 20, /* bxsubi_u */ 8, /* jmpr */ 8, /* jmpi */ - 12, /* callr */ + 8, /* callr */ 16, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -813,30 +446,32 @@ 8, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 20, /* bswapr_us */ - 52, /* bswapr_ui */ - 0, /* bswapr_ul */ - 36, /* casr */ - 44, /* casi */ -#endif /* NEW_ABI */ + 8, /* clo */ + 8, /* clz */ + 76, /* cto */ + 76, /* ctz */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 116 +#define JIT_INSTR_MAX 76 0, /* data */ 0, /* live */ - 4, /* align */ + 24, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 44, /* prolog */ + 76, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -844,11 +479,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 0, /* va_start */ - 0, /* va_arg */ - 0, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 8, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 28, /* addi */ @@ -862,7 +509,7 @@ 36, /* subci */ 28, /* subxr */ 28, /* subxi */ - 32, /* rsbi */ + 36, /* rsbi */ 8, /* mulr */ 32, /* muli */ 12, /* qmulr */ @@ -882,7 +529,7 @@ 8, /* remr_u */ 32, /* remi_u */ 4, /* andr */ - 28, /* andi */ + 8, /* andi */ 4, /* orr */ 28, /* ori */ 4, /* xorr */ @@ -900,15 +547,15 @@ 4, /* ltr_u */ 4, /* lti_u */ 8, /* ler */ - 12, /* lei */ + 4, /* lei */ 8, /* ler_u */ - 12, /* lei_u */ - 12, /* eqr */ - 12, /* eqi */ + 4, /* lei_u */ + 8, /* eqr */ + 8, /* eqi */ 8, /* ger */ - 12, /* gei */ + 8, /* gei */ 8, /* ger_u */ - 12, /* gei_u */ + 8, /* gei_u */ 4, /* gtr */ 8, /* gti */ 4, /* gtr_u */ @@ -919,59 +566,64 @@ 28, /* movi */ 4, /* movnr */ 4, /* movzr */ - 8, /* extr_c */ + 36, /* casr */ + 56, /* casi */ + 4, /* extr_c */ 4, /* extr_uc */ - 8, /* extr_s */ + 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ - 8, /* extr_ui */ + 4, /* extr_ui */ + 8, /* bswapr_us */ + 16, /* bswapr_ui */ + 44, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 4, /* htonr_ul */ 4, /* ldr_c */ - 12, /* ldi_c */ + 24, /* ldi_c */ 4, /* ldr_uc */ - 12, /* ldi_uc */ + 24, /* ldi_uc */ 4, /* ldr_s */ - 12, /* ldi_s */ + 24, /* ldi_s */ 4, /* ldr_us */ - 12, /* ldi_us */ + 24, /* ldi_us */ 4, /* ldr_i */ - 12, /* ldi_i */ + 24, /* ldi_i */ 4, /* ldr_ui */ - 12, /* ldi_ui */ + 24, /* ldi_ui */ 4, /* ldr_l */ - 12, /* ldi_l */ + 24, /* ldi_l */ 8, /* ldxr_c */ - 4, /* ldxi_c */ + 16, /* ldxi_c */ 8, /* ldxr_uc */ - 4, /* ldxi_uc */ + 16, /* ldxi_uc */ 8, /* ldxr_s */ - 4, /* ldxi_s */ + 16, /* ldxi_s */ 8, /* ldxr_us */ - 4, /* ldxi_us */ + 16, /* ldxi_us */ 8, /* ldxr_i */ - 4, /* ldxi_i */ + 16, /* ldxi_i */ 8, /* ldxr_ui */ - 4, /* ldxi_ui */ + 16, /* ldxi_ui */ 8, /* ldxr_l */ - 4, /* ldxi_l */ + 16, /* ldxi_l */ 4, /* str_c */ - 12, /* sti_c */ + 24, /* sti_c */ 4, /* str_s */ - 12, /* sti_s */ + 24, /* sti_s */ 4, /* str_i */ - 12, /* sti_i */ + 24, /* sti_i */ 4, /* str_l */ - 12, /* sti_l */ + 24, /* sti_l */ 8, /* stxr_c */ - 4, /* stxi_c */ + 16, /* stxi_c */ 8, /* stxr_s */ - 4, /* stxi_s */ + 16, /* stxi_s */ 8, /* stxr_i */ - 4, /* stxi_i */ + 16, /* stxi_i */ 8, /* stxr_l */ - 4, /* stxi_l */ + 16, /* stxi_l */ 12, /* bltr */ 12, /* blti */ 12, /* bltr_u */ @@ -991,7 +643,7 @@ 12, /* bgtr_u */ 16, /* bgti_u */ 8, /* bner */ - 32, /* bnei */ + 28, /* bnei */ 12, /* bmsr */ 12, /* bmsi */ 12, /* bmcr */ @@ -1012,18 +664,42 @@ 28, /* bxsubi */ 16, /* bxsubr_u */ 20, /* bxsubi_u */ - 0, /* jmpr */ + 8, /* jmpr */ 8, /* jmpi */ - 12, /* callr */ + 8, /* callr */ 32, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -1031,93 +707,93 @@ 0, /* retval_i */ 0, /* retval_ui */ 0, /* retval_l */ - 44, /* epilog */ + 76, /* epilog */ 0, /* arg_f */ 0, /* getarg_f */ 0, /* putargr_f */ 0, /* putargi_f */ 4, /* addr_f */ - 16, /* addi_f */ + 28, /* addi_f */ 4, /* subr_f */ - 16, /* subi_f */ - 16, /* rsbi_f */ + 28, /* subi_f */ + 28, /* rsbi_f */ 4, /* mulr_f */ - 16, /* muli_f */ + 28, /* muli_f */ 4, /* divr_f */ - 16, /* divi_f */ + 28, /* divi_f */ 4, /* negr_f */ 4, /* absr_f */ 4, /* sqrtr_f */ 16, /* ltr_f */ - 28, /* lti_f */ + 40, /* lti_f */ 16, /* ler_f */ - 28, /* lei_f */ + 40, /* lei_f */ 16, /* eqr_f */ - 28, /* eqi_f */ + 40, /* eqi_f */ 16, /* ger_f */ - 28, /* gei_f */ + 40, /* gei_f */ 16, /* gtr_f */ - 28, /* gti_f */ + 40, /* gti_f */ 16, /* ner_f */ - 28, /* nei_f */ + 40, /* nei_f */ 16, /* unltr_f */ - 28, /* unlti_f */ + 40, /* unlti_f */ 16, /* unler_f */ - 28, /* unlei_f */ + 40, /* unlei_f */ 16, /* uneqr_f */ - 28, /* uneqi_f */ + 40, /* uneqi_f */ 16, /* unger_f */ - 28, /* ungei_f */ + 40, /* ungei_f */ 16, /* ungtr_f */ - 28, /* ungti_f */ + 40, /* ungti_f */ 16, /* ltgtr_f */ - 28, /* ltgti_f */ + 40, /* ltgti_f */ 16, /* ordr_f */ - 28, /* ordi_f */ + 40, /* ordi_f */ 16, /* unordr_f */ - 28, /* unordi_f */ + 40, /* unordi_f */ 8, /* truncr_f_i */ 8, /* truncr_f_l */ 8, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ - 12, /* movi_f */ + 24, /* movi_f */ 4, /* ldr_f */ - 12, /* ldi_f */ + 24, /* ldi_f */ 8, /* ldxr_f */ - 4, /* ldxi_f */ + 16, /* ldxi_f */ 4, /* str_f */ - 12, /* sti_f */ + 24, /* sti_f */ 8, /* stxr_f */ - 4, /* stxi_f */ + 16, /* stxi_f */ 12, /* bltr_f */ - 24, /* blti_f */ + 36, /* blti_f */ 12, /* bler_f */ - 24, /* blei_f */ + 36, /* blei_f */ 12, /* beqr_f */ - 24, /* beqi_f */ + 36, /* beqi_f */ 12, /* bger_f */ - 24, /* bgei_f */ + 36, /* bgei_f */ 12, /* bgtr_f */ - 24, /* bgti_f */ + 36, /* bgti_f */ 12, /* bner_f */ - 24, /* bnei_f */ + 36, /* bnei_f */ 12, /* bunltr_f */ - 24, /* bunlti_f */ + 36, /* bunlti_f */ 12, /* bunler_f */ - 24, /* bunlei_f */ + 36, /* bunlei_f */ 12, /* buneqr_f */ - 24, /* buneqi_f */ + 36, /* buneqi_f */ 12, /* bunger_f */ - 24, /* bungei_f */ + 36, /* bungei_f */ 12, /* bungtr_f */ - 24, /* bungti_f */ + 36, /* bungti_f */ 12, /* bltgtr_f */ - 24, /* bltgti_f */ + 36, /* bltgti_f */ 12, /* bordr_f */ - 24, /* bordi_f */ + 36, /* bordi_f */ 12, /* bunordr_f */ - 24, /* bunordi_f */ + 36, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -1128,87 +804,87 @@ 0, /* putargr_d */ 0, /* putargi_d */ 4, /* addr_d */ - 16, /* addi_d */ + 28, /* addi_d */ 4, /* subr_d */ - 16, /* subi_d */ - 16, /* rsbi_d */ + 28, /* subi_d */ + 28, /* rsbi_d */ 4, /* mulr_d */ - 16, /* muli_d */ + 28, /* muli_d */ 4, /* divr_d */ - 16, /* divi_d */ + 28, /* divi_d */ 4, /* negr_d */ 4, /* absr_d */ 4, /* sqrtr_d */ 16, /* ltr_d */ - 28, /* lti_d */ + 44, /* lti_d */ 16, /* ler_d */ - 28, /* lei_d */ + 44, /* lei_d */ 16, /* eqr_d */ - 28, /* eqi_d */ + 44, /* eqi_d */ 16, /* ger_d */ - 28, /* gei_d */ + 44, /* gei_d */ 16, /* gtr_d */ - 28, /* gti_d */ + 44, /* gti_d */ 16, /* ner_d */ - 28, /* nei_d */ + 44, /* nei_d */ 16, /* unltr_d */ - 28, /* unlti_d */ + 44, /* unlti_d */ 16, /* unler_d */ - 28, /* unlei_d */ + 44, /* unlei_d */ 16, /* uneqr_d */ - 28, /* uneqi_d */ + 44, /* uneqi_d */ 16, /* unger_d */ - 28, /* ungei_d */ + 44, /* ungei_d */ 16, /* ungtr_d */ - 28, /* ungti_d */ + 44, /* ungti_d */ 16, /* ltgtr_d */ - 28, /* ltgti_d */ + 44, /* ltgti_d */ 16, /* ordr_d */ - 28, /* ordi_d */ + 44, /* ordi_d */ 16, /* unordr_d */ - 28, /* unordi_d */ + 44, /* unordi_d */ 8, /* truncr_d_i */ 8, /* truncr_d_l */ 8, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 12, /* movi_d */ + 28, /* movi_d */ 4, /* ldr_d */ - 12, /* ldi_d */ + 24, /* ldi_d */ 8, /* ldxr_d */ - 4, /* ldxi_d */ + 16, /* ldxi_d */ 4, /* str_d */ - 12, /* sti_d */ + 24, /* sti_d */ 8, /* stxr_d */ - 4, /* stxi_d */ + 16, /* stxi_d */ 12, /* bltr_d */ - 24, /* blti_d */ + 36, /* blti_d */ 12, /* bler_d */ - 24, /* blei_d */ + 36, /* blei_d */ 12, /* beqr_d */ - 24, /* beqi_d */ + 36, /* beqi_d */ 12, /* bger_d */ - 24, /* bgei_d */ + 36, /* bgei_d */ 12, /* bgtr_d */ - 24, /* bgti_d */ + 36, /* bgti_d */ 12, /* bner_d */ - 24, /* bnei_d */ + 40, /* bnei_d */ 12, /* bunltr_d */ - 24, /* bunlti_d */ + 40, /* bunlti_d */ 12, /* bunler_d */ - 24, /* bunlei_d */ + 40, /* bunlei_d */ 12, /* buneqr_d */ - 24, /* buneqi_d */ + 40, /* buneqi_d */ 12, /* bunger_d */ - 24, /* bungei_d */ + 40, /* bungei_d */ 12, /* bungtr_d */ - 24, /* bungti_d */ + 40, /* bungti_d */ 12, /* bltgtr_d */ - 24, /* bltgti_d */ + 36, /* bltgti_d */ 12, /* bordr_d */ - 24, /* bordi_d */ + 36, /* bordi_d */ 12, /* bunordr_d */ - 24, /* bunordi_d */ + 40, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -1222,10 +898,9 @@ 0, /* movr_d_ww */ 0, /* movi_d_ww */ 4, /* movr_d_w */ - 12, /* movi_d_w */ - 20, /* bswapr_us */ - 52, /* bswapr_ui */ - 116, /* bswapr_ul */ - 36, /* casr */ - 44, /* casi */ + 24, /* movi_d_w */ + 4, /* clo */ + 4, /* clz */ + 72, /* cto */ + 72, /* ctz */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_mips.c b/deps/lightning/lib/jit_mips.c index d98d94e8..6d564238 100644 --- a/deps/lightning/lib/jit_mips.c +++ b/deps/lightning/lib/jit_mips.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -21,6 +21,16 @@ # include #endif +#if NEW_ABI +/* callee save + variadic arguments + * align16(ra+fp+s[0-7]++f20+f22+f24+f26+f28+f30) + align16(a[0-7]) */ +# define stack_framesize (128 + 64) +#else +/* callee save + * align16(ra+fp+s[0-7]+f16+f18+f20+f22+f24+f26+f28+f30) */ +# define stack_framesize 128 +#endif + #if NEW_ABI # define NUM_WORD_ARGS 8 # define STACK_SLOT 8 @@ -54,12 +64,14 @@ typedef struct jit_pointer_t jit_va_list_t; /* * Prototypes */ -#define jit_make_arg(node) _jit_make_arg(_jit,node) -static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*); +#define jit_make_arg(node,code) _jit_make_arg(_jit,node,code) +static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t); #define jit_make_arg_f(node) _jit_make_arg_f(_jit,node) static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*); #define jit_make_arg_d(node) _jit_make_arg_d(_jit,node) static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*); +#define compute_framesize() _compute_framesize(_jit) +static void _compute_framesize(jit_state_t*); #define patch(instr, node) _patch(_jit, instr, node) static void _patch(jit_state_t*,jit_word_t,jit_node_t*); @@ -67,11 +79,13 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); # include "jit_rewind.c" # include "jit_mips-cpu.c" # include "jit_mips-fpu.c" +# include "jit_fallback.c" #undef PROTO /* * Initialization */ +jit_cpu_t jit_cpu; jit_register_t _rvs[] = { { rc(gpr) | 0x01, "at" }, { rc(gpr) | 0x02, "v0" }, @@ -145,12 +159,49 @@ jit_register_t _rvs[] = { { _NOREG, "" }, }; +static jit_int32_t iregs[] = { + _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7 +}; + +static jit_int32_t fregs[] = { +#if !NEW_ABI + _F16, _F18, +#endif + _F20, _F22, _F24, _F26, _F28, _F30 +}; + /* * Implementation */ void jit_get_cpu(void) { +#if defined(__linux__) + FILE *fp; + char *ptr; + char buf[128]; + + if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) { + while (fgets(buf, sizeof(buf), fp)) { + if (strncmp(buf, "isa : ", 8) == 0) { + if ((ptr = strstr(buf + 9, "mips64r"))) + jit_cpu.release = strtoul(ptr + 7, NULL, 10); + break; + } + } + fclose(fp); + } +#endif +#if __mips_isa_rev + if (!jit_cpu.release) + jit_cpu.release = __mips_isa_rev; +#elif defined _MIPS_ARCH + if (!jit_cpu.release) + jit_cpu.release = strtoul(&_MIPS_ARCH[4], NULL, 10); +#elif defined(__mips) && __mips < 6 + if (!jit_cpu.release) + jit_cpu.release = __mips; +#endif } void @@ -211,6 +262,7 @@ jit_int32_t _jit_allocai(jit_state_t *_jit, jit_int32_t length) { assert(_jitc->function); + jit_check_frame(); switch (length) { case 0: case 1: break; case 2: _jitc->function->self.aoff &= -2; break; @@ -259,20 +311,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -332,18 +382,18 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); #if NEW_ABI - return (jit_arg_reg_p(u->u.w)); + return (jit_arg_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8)); #else return (u->u.w < 8); #endif } static jit_node_t * -_jit_make_arg(jit_state_t *_jit, jit_node_t *node) +_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code) { jit_int32_t offset; #if NEW_ABI @@ -355,13 +405,13 @@ _jit_make_arg(jit_state_t *_jit, jit_node_t *node) } #else offset = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT; - _jitc->function->self.argi = 1; + ++_jitc->function->self.argi; if (offset >= 4) offset = _jitc->function->self.size; _jitc->function->self.size += STACK_SLOT; #endif if (node == (jit_node_t *)0) - node = jit_new_node(jit_code_arg); + node = jit_new_node(code); else link_node(node); node->u.w = offset; @@ -469,7 +519,6 @@ _jit_ellipsis(jit_state_t *_jit) else { assert(!(_jitc->function->self.call & jit_call_varargs)); #if NEW_ABI - /* If varargs start in a register, allocate extra 64 bytes. */ if (jit_arg_reg_p(_jitc->function->self.argi)) rewind_prolog(); /* Do not set during possible rewind. */ @@ -482,6 +531,7 @@ _jit_ellipsis(jit_state_t *_jit) _jitc->function->vagp = _jitc->function->self.argi; } jit_inc_synth(ellipsis); + jit_check_frame(); if (_jitc->prepare) jit_link_prepare(); else @@ -498,10 +548,14 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { assert(_jitc->function); - return (jit_make_arg((jit_node_t*)0)); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif + return (jit_make_arg((jit_node_t*)0, code)); } jit_node_t * @@ -521,55 +575,67 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _A0 - v->u.w); - else - jit_ldxi_c(u, _FP, v->u.w + C_DISP); + else { + jit_node_t *node = jit_ldxi_c(u, _FP, v->u.w + C_DISP); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _A0 - v->u.w); - else - jit_ldxi_uc(u, _FP, v->u.w + C_DISP); + else { + jit_node_t *node = jit_ldxi_uc(u, _FP, v->u.w + C_DISP); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _A0 - v->u.w); - else - jit_ldxi_s(u, _FP, v->u.w + S_DISP); + else { + jit_node_t *node = jit_ldxi_s(u, _FP, v->u.w + S_DISP); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _A0 - v->u.w); - else - jit_ldxi_us(u, _FP, v->u.w + S_DISP); + else { + jit_node_t *node = jit_ldxi_us(u, _FP, v->u.w + S_DISP); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) { #if __WORDSIZE == 64 @@ -578,8 +644,11 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr(u, _A0 - v->u.w); #endif } - else - jit_ldxi_i(u, _FP, v->u.w + I_DISP); + else { + jit_node_t *node = jit_ldxi_i(u, _FP, v->u.w + I_DISP); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } @@ -587,52 +656,64 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _A0 - v->u.w); - else - jit_ldxi_ui(u, _FP, v->u.w + I_DISP); + else { + jit_node_t *node = jit_ldxi_ui(u, _FP, v->u.w + I_DISP); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _A0 - v->u.w); - else - jit_ldxi_l(u, _FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_l(u, _FP, v->u.w); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } #endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - jit_inc_synth_wp(putargr, u, v); - assert(v->code == jit_code_arg); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_A0 - v->u.w, u); - else - jit_stxi(v->u.w + WORD_ADJUST, _FP, u); + else { + jit_node_t *node = jit_stxi(v->u.w + WORD_ADJUST, _FP, u); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_A0 - v->u.w, u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(v->u.w + WORD_ADJUST, _FP, regno); + node = jit_stxi(v->u.w + WORD_ADJUST, _FP, regno); + jit_link_alist(node); + jit_check_frame(); jit_unget_reg(regno); } jit_dec_synth(); @@ -647,15 +728,18 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) if (jit_arg_reg_p(v->u.w)) jit_movr_f(u, _F12 - v->u.w); else if (jit_arg_reg_p(v->u.w - 8)) - jit_movr_w_f(u, _A0 - v->u.w - 8); + jit_movr_w_f(u, _A0 - (v->u.w - 8)); #else if (v->u.w < 4) jit_movr_w_f(u, _A0 - v->u.w); else if (v->u.w < 8) jit_movr_f(u, _F12 - ((v->u.w - 4) >> 1)); #endif - else - jit_ldxi_f(u, _FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_f(u, _FP, v->u.w); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } @@ -668,15 +752,18 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) if (jit_arg_reg_p(v->u.w)) jit_movr_f(_F12 - v->u.w, u); else if (jit_arg_reg_p(v->u.w - 8)) - jit_movr_f_w(_A0 - v->u.w - 8, u); + jit_movr_f_w(_A0 - (v->u.w - 8), u); #else if (v->u.w < 4) jit_movr_f_w(_A0 - v->u.w, u); else if (v->u.w < 8) jit_movr_f(_F12 - ((v->u.w - 4) >> 1), u); #endif - else - jit_stxi_f(v->u.w, _FP, u); + else { + jit_node_t *node = jit_stxi_f(v->u.w, _FP, u); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } @@ -689,12 +776,8 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) #if NEW_ABI if (jit_arg_reg_p(v->u.w)) jit_movi_f(_F12 - v->u.w, u); - else if (jit_arg_reg_p(v->u.w - 8)) { - regno = jit_get_reg(jit_class_fpr); - jit_movi_f(regno, u); - jit_movr_f_w(_A0 - v->u.w - 8, u); - jit_unget_reg(regno); - } + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movi_f_w(_A0 - (v->u.w - 8), u); #else if (v->u.w < 4) { regno = jit_get_reg(jit_class_fpr); @@ -706,9 +789,12 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) jit_movi_f(_F12 - ((v->u.w - 4) >> 1), u); #endif else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); - jit_stxi_f(v->u.w, _FP, regno); + node = jit_stxi_f(v->u.w, _FP, regno); + jit_link_alist(node); + jit_check_frame(); jit_unget_reg(regno); } jit_dec_synth(); @@ -723,15 +809,18 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) if (jit_arg_reg_p(v->u.w)) jit_movr_d(u, _F12 - v->u.w); else if (jit_arg_reg_p(v->u.w - 8)) - jit_movr_d_w(_A0 - v->u.w - 8, u); + jit_movr_d_w(_A0 - (v->u.w - 8), u); #else if (v->u.w < 4) jit_movr_ww_d(u, _A0 - v->u.w, _A0 - (v->u.w + 1)); else if (v->u.w < 8) jit_movr_d(u, _F12 - ((v->u.w - 4) >> 1)); #endif - else - jit_ldxi_d(u, _FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_d(u, _FP, v->u.w); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } @@ -744,15 +833,18 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) if (jit_arg_reg_p(v->u.w)) jit_movr_d(_F12 - v->u.w, u); else if (jit_arg_reg_p(v->u.w - 8)) - jit_movr_d_w(_A0 - v->u.w - 8, u); + jit_movr_d_w(_A0 - (v->u.w - 8), u); #else if (v->u.w < 4) jit_movr_d_ww(_A0 - v->u.w, _A0 - (v->u.w + 1), u); else if (v->u.w < 8) jit_movr_d(_F12 - ((v->u.w - 4) >> 1), u); #endif - else - jit_stxi_d(v->u.w, _FP, u); + else { + jit_node_t *node = jit_stxi_d(v->u.w, _FP, u); + jit_link_alist(node); + jit_check_frame(); + } jit_dec_synth(); } @@ -765,12 +857,8 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) #if NEW_ABI if (jit_arg_reg_p(v->u.w)) jit_movi_d(_F12 - v->u.w, u); - else if (jit_arg_reg_p(v->u.w - 8)) { - regno = jit_get_reg(jit_class_fpr); - jit_movi_d(regno, u); - jit_movr_d_w(_A0 - v->u.w - 8, u); - jit_unget_reg(regno); - } + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movi_d_w(_A0 - (v->u.w - 8), u); #else if (v->u.w < 4) { regno = jit_get_reg(jit_class_fpr); @@ -782,18 +870,21 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) jit_movi_d(_F12 - ((v->u.w - 4) >> 1), u); #endif else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); - jit_stxi_d(v->u.w, _FP, regno); + node = jit_stxi_d(v->u.w, _FP, regno); + jit_link_alist(node); + jit_check_frame(); jit_unget_reg(regno); } jit_dec_synth(); } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); #if NEW_ABI assert(_jitc->function); @@ -802,6 +893,7 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) ++_jitc->function->call.argi; } else { + jit_check_frame(); jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, u); _jitc->function->call.size += STACK_SLOT; } @@ -809,25 +901,27 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) jit_word_t offset; assert(_jitc->function); offset = _jitc->function->call.size >> STACK_SHIFT; - _jitc->function->call.argi = 1; + ++_jitc->function->call.argi; if (jit_arg_reg_p(offset)) jit_movr(_A0 - offset, u); - else + else { + jit_check_frame(); jit_stxi(_jitc->function->call.size, JIT_SP, u); + } _jitc->function->call.size += STACK_SLOT; #endif jit_dec_synth(); } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; #if !NEW_ABI jit_word_t offset; #endif assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); #if NEW_ABI if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -835,6 +929,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) ++_jitc->function->call.argi; } else { + jit_check_frame(); regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, regno); @@ -847,6 +942,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) if (jit_arg_reg_p(offset)) jit_movi(_A0 - offset, u); else { + jit_check_frame(); regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); jit_stxi(_jitc->function->call.size, JIT_SP, regno); @@ -875,6 +971,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) ++_jitc->function->call.argi; } else { + jit_check_frame(); jit_stxi_f(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += STACK_SLOT; } @@ -889,8 +986,10 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) ++_jitc->function->call.argi; jit_movr_f_w(_A0 - offset, u); } - else + else { + jit_check_frame(); jit_stxi_f(_jitc->function->call.size, JIT_SP, u); + } _jitc->function->call.size += STACK_SLOT; #endif jit_dec_synth(); @@ -915,6 +1014,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) ++_jitc->function->call.argi; } else { + jit_check_frame(); regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); @@ -933,6 +1033,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) jit_movi_f_w(_A0 - offset, u); } else { + jit_check_frame(); regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); @@ -962,6 +1063,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) ++_jitc->function->call.argi; } else { + jit_check_frame(); jit_stxi_d(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += STACK_SLOT; } @@ -982,8 +1084,10 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) ++_jitc->function->call.argf; } } - else + else { + jit_check_frame(); jit_stxi_d(_jitc->function->call.size, JIT_SP, u); + } _jitc->function->call.size += sizeof(jit_float64_t); #endif jit_dec_synth(); @@ -1009,6 +1113,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) ++_jitc->function->call.argi; } else { + jit_check_frame(); regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); @@ -1033,6 +1138,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) } } else { + jit_check_frame(); regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); @@ -1070,6 +1176,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) { jit_node_t *call; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishr, r0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; @@ -1090,13 +1197,12 @@ jit_node_t * _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) { jit_node_t *call; - jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishi, (jit_word_t)i0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; - node = jit_movi(_T9, (jit_word_t)i0); - call = jit_callr(_T9); + call = jit_calli(i0); call->v.w = _jitc->function->call.argi; #if NEW_ABI call->w.w = call->v.w; @@ -1107,7 +1213,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) _jitc->function->call.size = 0; _jitc->prepare = 0; jit_dec_synth(); - return (node); + return (call); } void @@ -1182,9 +1288,11 @@ _emit_code(jit_state_t *_jit) jit_word_t word; jit_int32_t value; jit_int32_t offset; + struct { jit_node_t *node; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1296,18 +1404,30 @@ _emit_code(jit_state_t *_jit) prevw = _jit->pc.w; #endif value = jit_classify(node->code); +#if GET_JIT_SIZE + flush(); +#endif jit_regarg_set(node, value); switch (node->code) { case jit_code_align: /* Must align to a power of two */ assert(!(node->u.w & (node->u.w - 1))); + flush(); if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); + flush(); + break; + case jit_code_skip: + flush(); + nop((node->u.w + 3) & ~3); + flush(); break; case jit_code_note: case jit_code_name: + flush(); node->u.w = _jit->pc.w; break; case jit_code_label: + flush(); /* remember label is defined */ node->flag |= jit_flag_patch; node->u.w = _jit->pc.w; @@ -1461,6 +1581,10 @@ _emit_code(jit_state_t *_jit) break; case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_rrr(lt,); case_rrw(lt,); case_rrr(lt, _u); @@ -1688,6 +1812,7 @@ _emit_code(jit_state_t *_jit) case_brr(bunord, _d); case_brf(bunord, _d, 64); case jit_code_jmpr: + jit_check_frame(); jmpr(rn(node->u.w)); break; case jit_code_jmpi: @@ -1696,16 +1821,24 @@ _emit_code(jit_state_t *_jit) assert(temp->code == jit_code_label || temp->code == jit_code_epilog); if (temp->flag & jit_flag_patch) - jmpi(temp->u.w); + jmpi(temp->u.w, 0); else { - word = jmpi(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (jit_mips2_p() && can_relative_jump_p(word)) + word = jmpi(_jit->pc.w, 1); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } - else - jmpi(node->u.w); + else { + jit_check_frame(); + jmpi(node->u.w, 0); + } break; case jit_code_callr: + jit_check_frame(); callr(rn(node->u.w)); break; case jit_code_calli: @@ -1713,23 +1846,37 @@ _emit_code(jit_state_t *_jit) temp = node->u.n; assert(temp->code == jit_code_label || temp->code == jit_code_epilog); - word = calli_p(temp->u.w); - if (!(temp->flag & jit_flag_patch)) + if (temp->flag & jit_flag_patch) + calli(temp->u.w, 0); + else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (jit_mips2_p() && can_relative_jump_p(word)) + word = calli(_jit->pc.w, 1); + else + word = calli_p(_jit->pc.w); patch(word, node); + } + } + else { + jit_check_frame(); + calli(node->u.w, 0); } - else - calli(node->u.w); break; case jit_code_prolog: + flush(); _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif undo.patch_offset = _jitc->patches.offset; restart_function: _jitc->again = 0; + compute_framesize(); + patch_alist(0); prolog(node); break; case jit_code_epilog: @@ -1744,13 +1891,29 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + undo.func.need_frame = _jitc->function->need_frame; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + /* this will be recomputed but undo anyway to have it + * better self documented.*/ + undo.func.need_stack = _jitc->function->need_stack; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif _jitc->patches.offset = undo.patch_offset; + patch_alist(1); goto restart_function; } /* remember label is defined */ + flush(); node->flag |= jit_flag_patch; node->u.w = _jit->pc.w; epilog(node); @@ -1798,14 +1961,26 @@ _emit_code(jit_state_t *_jit) case jit_code_va_arg_d: vaarg_d(rn(node->u.w), rn(node->v.w)); break; - case jit_code_live: - case jit_code_arg: case jit_code_ellipsis: + case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1815,10 +1990,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1848,6 +2039,9 @@ _emit_code(jit_state_t *_jit) break; } } +#if GET_JIT_SIZE + flush(); +#endif jit_regarg_clr(node, value); assert(_jitc->regarg == 0 || (jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry))); @@ -1855,6 +2049,7 @@ _emit_code(jit_state_t *_jit) /* update register live state */ jit_reglive(node); } + flush(); #undef case_brf #undef case_brw #undef case_brr @@ -1881,6 +2076,7 @@ _emit_code(jit_state_t *_jit) # include "jit_rewind.c" # include "jit_mips-cpu.c" # include "jit_mips-fpu.c" +# include "jit_fallback.c" #undef CODE void @@ -1920,6 +2116,29 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) stxi_d(i0, rn(r0), rn(r1)); } +static void +_compute_framesize(jit_state_t *_jit) +{ + jit_int32_t reg; + _jitc->framesize = STACK_SLOT << 1; /* ra+fp */ + for (reg = 0; reg < jit_size(iregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) + _jitc->framesize += STACK_SLOT; + + for (reg = 0; reg < jit_size(fregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) + _jitc->framesize += sizeof(jit_float64_t); + +#if NEW_ABI + /* Space to store variadic arguments */ + if (_jitc->function->self.call & jit_call_varargs) + _jitc->framesize += (NUM_WORD_ARGS - _jitc->function->vagp) * STACK_SLOT; +#endif + + /* Make sure functions called have a 16 byte aligned stack */ + _jitc->framesize = (_jitc->framesize + 15) & -16; +} + static void _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) { diff --git a/deps/lightning/lib/jit_names.c b/deps/lightning/lib/jit_names.c index b663b672..e5985a32 100644 --- a/deps/lightning/lib/jit_names.c +++ b/deps/lightning/lib/jit_names.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2022 Free Software Foundation, Inc. + * Copyright (C) 2014-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -21,17 +21,27 @@ static char *code_name[] = { "data", "live", "align", "save", "load", + "skip", "#name", "#note", "label", "prolog", "ellipsis", "va_push", "allocai", "allocar", - "arg", + "arg_c", + "arg_s", + "arg_i", + "arg_l", "getarg_c", "getarg_uc", "getarg_s", "getarg_us", "getarg_i", "getarg_ui", "getarg_l", - "putargr", "putargi", + "putargr_c", "putargi_c", + "putargr_uc", "putargi_uc", + "putargr_s", "putargi_s", + "putargr_us", "putargi_us", + "putargr_i", "putargi_i", + "putargr_ui", "putargi_ui", + "putargr_l", "putargi_l", "va_start", "va_arg", "va_arg_d", "va_end", @@ -70,9 +80,12 @@ static char *code_name[] = { "ner", "nei", "movr", "movi", "movnr", "movzr", + "casr", "casi", "extr_c", "extr_uc", "extr_s", "extr_us", "extr_i", "extr_ui", + "bswapr_us", + "bswapr_ui", "bswapr_ul", "htonr_us", "htonr_ui", "htonr_ul", "ldr_c", "ldi_c", @@ -120,10 +133,22 @@ static char *code_name[] = { "jmpr", "jmpi", "callr", "calli", "prepare", - "pushargr", "pushargi", + "pushargr_c", "pushargi_c", + "pushargr_uc", "pushargi_uc", + "pushargr_s", "pushargi_s", + "pushargr_us", "pushargi_us", + "pushargr_i", "pushargi_i", + "pushargr_ui", "pushargi_ui", + "pushargr_l", "pushargi_l", "finishr", "finishi", "ret", - "retr", "reti", + "retr_c", "reti_c", + "retr_uc", "reti_uc", + "retr_s", "reti_s", + "retr_us", "reti_us", + "retr_i", "reti_i", + "retr_ui", "reti_ui", + "retr_l", "reti_l", "retval_c", "retval_uc", "retval_s", "retval_us", "retval_i", "retval_ui", @@ -228,7 +253,6 @@ static char *code_name[] = { "movr_f_w", "movi_f_w", "movr_d_ww", "movi_d_ww", "movr_d_w", "movi_d_w", - "bswapr_us", - "bswapr_ui", "bswapr_ul", - "casr", "casi", + "clo", "clz", + "cto", "ctz", }; diff --git a/deps/lightning/lib/jit_note.c b/deps/lightning/lib/jit_note.c index f1c149fc..b0556192 100644 --- a/deps/lightning/lib/jit_note.c +++ b/deps/lightning/lib/jit_note.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_ppc-cpu.c b/deps/lightning/lib/jit_ppc-cpu.c index f205db07..67874c60 100644 --- a/deps/lightning/lib/jit_ppc-cpu.c +++ b/deps/lightning/lib/jit_ppc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -202,8 +202,21 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int); # define XCMPLI(cr,l,a,u) FCI(10,cr,l,a,u) # define CMPLDI(a,s) XCMPLI(0,1,a,s) # define CMPLWI(a,s) XCMPLI(0,0,a,s) +# if __WORDSIZE == 32 +# define CMPX(a,b) CMPW(a,b) +# define CMPXI(a,s) CMPWI(a,s) +# define CMPLX(a,b) CMPLW(a,b) +# define CMPLXI(a,s) CMPLWI(a,s) +# else +# define CMPX(a,b) CMPD(a,b) +# define CMPXI(a,s) CMPDI(a,s) +# define CMPLX(a,b) CMPLD(a,b) +# define CMPLXI(a,s) CMPLDI(a,s) +# endif # define CNTLZW(a,s) FX(31,s,a,0,26) # define CNTLZW_(a,s) FX_(31,s,a,0,26) +# define CNTLZD(a,s) FX(31,s,a,0,58) +# define CNTLZD_(a,s) FX_(31,s,a,0,58) # define CRAND(d,a,b) FX(19,d,a,b,257) # define CRANDC(d,a,b) FX(19,d,a,b,129) # define CREQV(d,a,b) FX(19,d,a,b,289) @@ -520,6 +533,19 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, #define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define negr(r0,r1) NEG(r0,r1) # define comr(r0,r1) NOT(r0,r1) +# define bitswap(r0, r1) _bitswap(_jit, r0, r1) +static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t); +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# if __WORDSIZE == 32 +# define clzr(r0, r1) CNTLZW(r0, r1) +# else +# define clzr(r0, r1) CNTLZD(r0, r1) +# endif +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); # define extr_c(r0,r1) EXTSB(r0,r1) # define extr_uc(r0,r1) ANDI_(r0,r1,0xff) # define extr_s(r0,r1) EXTSH(r0,r1) @@ -858,14 +884,14 @@ static jit_word_t _jmpi_p(jit_state_t*,jit_word_t) maybe_unused; # define callr(r0,i0) _callr(_jit,r0,i0) static void _callr(jit_state_t*,jit_int32_t,jit_int32_t); # define calli(i0,i1) _calli(_jit,i0,i1) -static void _calli(jit_state_t*,jit_word_t,jit_int32_t); +static jit_word_t _calli(jit_state_t*,jit_word_t,jit_int32_t); # define calli_p(i0,i1) _calli_p(_jit,i0,i1) static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_int32_t); # else # define callr(r0) _callr(_jit,r0) static void _callr(jit_state_t*,jit_int32_t); # define calli(i0) _calli(_jit,i0) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(i0) _calli_p(_jit,i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); #endif @@ -1125,7 +1151,7 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) static void _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPWI(r2, 0); + CMPXI(r2, 0); BEQ(8); MR(r0, r1); } @@ -1133,7 +1159,7 @@ _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) static void _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPWI(r2, 0); + CMPXI(r2, 0); BNE(8); MR(r0, r1); } @@ -1194,6 +1220,94 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_unget_reg(r1_reg); } +/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ +/* +unsigned int v; // 32-bit word to reverse bit order + +// swap odd and even bits +v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); +// swap consecutive pairs +v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); +// swap nibbles ... +v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); +// swap bytes +v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); +// swap 2-byte long pairs +v = ( v >> 16 ) | ( v << 16); + */ +static void +_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1, t2, t3, t4; + movr(r0, r1); + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + t2 = jit_get_reg(jit_class_gpr); + movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); + rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); + rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); + rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); + rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# if __WORDSIZE == 32 + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + lshi(rn(t2), r0, 16); /* t2 = v << 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# else + movi(rn(t0), 0x0000ffff0000ffffL); + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ + lshi(rn(t2), r0, 32); /* t2 = v << 32 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# endif + jit_unget_reg(t2); + jit_unget_reg(t1); + jit_unget_reg(t0); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + comr(r0, r1); + clzr(r0, r0); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + bitswap(r0, r1); + clor(r0, r0); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + bitswap(r0, r1); + clzr(r0, r0); +} + static void _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag) { @@ -1627,7 +1741,7 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); MFCR(r0); EXTRWI(r0, r0, 1, CR_LT); } @@ -1637,11 +1751,11 @@ _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } MFCR(r0); @@ -1675,7 +1789,7 @@ _lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); CRNOT(CR_GT, CR_GT); MFCR(r0); EXTRWI(r0, r0, 1, CR_GT); @@ -1686,11 +1800,11 @@ _lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } CRNOT(CR_GT, CR_GT); @@ -1727,7 +1841,7 @@ _lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); MFCR(r0); EXTRWI(r0, r0, 1, CR_EQ); } @@ -1737,13 +1851,13 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else if (can_zero_extend_short_p(i0)) CMPLWI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } MFCR(r0); @@ -1753,7 +1867,7 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); CRNOT(CR_LT, CR_LT); MFCR(r0); EXTRWI(r0, r0, 1, CR_LT); @@ -1764,11 +1878,11 @@ _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } CRNOT(CR_LT, CR_LT); @@ -1805,7 +1919,7 @@ _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); MFCR(r0); EXTRWI(r0, r0, 1, CR_GT); } @@ -1815,11 +1929,11 @@ _gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } MFCR(r0); @@ -1853,7 +1967,7 @@ _gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) static void _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { - CMPW(r1, r2); + CMPX(r1, r2); CRNOT(CR_EQ, CR_EQ); MFCR(r0); EXTRWI(r0, r0, 1, CR_EQ); @@ -1864,13 +1978,13 @@ _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; if (can_sign_extend_short_p(i0)) - CMPWI(r1, i0); + CMPXI(r1, i0); else if (can_zero_extend_short_p(i0)) CMPLWI(r1, i0); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); - CMPW(r1, rn(reg)); + CMPX(r1, rn(reg)); jit_unget_reg(reg); } CRNOT(CR_EQ, CR_EQ); @@ -1882,7 +1996,7 @@ static jit_word_t _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BLT(d); @@ -1895,11 +2009,11 @@ _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -1942,7 +2056,7 @@ static jit_word_t _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BLE(d); @@ -1955,11 +2069,11 @@ _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -2002,7 +2116,7 @@ static jit_word_t _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BEQ(d); @@ -2015,13 +2129,13 @@ _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else if (can_zero_extend_short_p(i1)) CMPLWI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -2034,7 +2148,7 @@ static jit_word_t _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BGE(d); @@ -2047,11 +2161,11 @@ _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -2094,7 +2208,7 @@ static jit_word_t _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BGT(d); @@ -2107,11 +2221,11 @@ _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -2154,7 +2268,7 @@ static jit_word_t _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { jit_word_t d, w; - CMPW(r0, r1); + CMPX(r0, r1); w = _jit->pc.w; d = (i0 - w) & ~3; BNE(d); @@ -2167,13 +2281,13 @@ _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; jit_word_t d, w; if (can_sign_extend_short_p(i1)) - CMPWI(r0, i1); + CMPXI(r0, i1); else if (can_zero_extend_short_p(i1)) CMPLWI(r0, i1); else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i1); - CMPW(r0, rn(reg)); + CMPX(r0, rn(reg)); jit_unget_reg(reg); } w = _jit->pc.w; @@ -2772,7 +2886,7 @@ _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) jit_int32_t reg; if (r1 == _R0_REGNO) { if (r2 != _R0_REGNO) - LWZX(r0, r2, r1); + LWAX(r0, r2, r1); else { reg = jit_get_reg(jit_class_gpr); movr(rn(reg), r1); @@ -2781,7 +2895,7 @@ _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } } else - LWZX(r0, r1, r2); + LWAX(r0, r1, r2); } static void @@ -3301,24 +3415,28 @@ _callr(jit_state_t *_jit, jit_int32_t r0 } /* assume fixed address or reachable address */ -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0 # if _CALL_SYSV , jit_int32_t varargs # endif ) { + jit_word_t w; # if _CALL_SYSV jit_word_t d; d = (i0 - _jit->pc.w - !!varargs * 4) & ~3; if (can_sign_extend_jump_p(d)) { - /* Tell double arguments were passed in registers. */ - if (varargs) - CREQV(6, 6, 6); - BL(d); - } else + /* Tell double arguments were passed in registers. */ + if (varargs) + CREQV(6, 6, 6); + w = _jit->pc.w; + BL(d); + } + else # endif { + w = _jit->pc.w; movi(_R12_REGNO, i0); callr(_R12_REGNO # if _CALL_SYSV @@ -3326,6 +3444,7 @@ _calli(jit_state_t *_jit, jit_word_t i0 # endif ); } + return (w); } /* absolute jump */ @@ -3649,7 +3768,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) if (!can_sign_extend_short_p(d)) { /* use absolute address */ assert(can_sign_extend_short_p(label)); - d |= 2; + d = label | 2; } u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe); break; @@ -3677,9 +3796,9 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) if (!can_sign_extend_jump_p(d)) { /* use absolute address */ assert(can_sign_extend_jump_p(label)); - d |= 2; + d = label | 2; } - u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe); + u.i[0] = (u.i[0] & ~0x3fffffc) | (d & 0x3fffffd); break; case 15: /* LI */ #if __WORDSIZE == 32 diff --git a/deps/lightning/lib/jit_ppc-fpu.c b/deps/lightning/lib/jit_ppc-fpu.c index a2edbd89..12631cd4 100644 --- a/deps/lightning/lib/jit_ppc-fpu.c +++ b/deps/lightning/lib/jit_ppc-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_ppc-sz.c b/deps/lightning/lib/jit_ppc-sz.c index 212e6372..c8a4471e 100644 --- a/deps/lightning/lib/jit_ppc-sz.c +++ b/deps/lightning/lib/jit_ppc-sz.c @@ -1,22 +1,26 @@ #if __WORDSIZE == 32 #if defined(__powerpc__) #if __BYTE_ORDER == __BIG_ENDIAN -#if _CALL_SYSV -#define JIT_INSTR_MAX 124 +#if !_CALL_SYSV +#define JIT_INSTR_MAX 136 0, /* data */ 0, /* live */ - 0, /* align */ + 20, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 124, /* prolog */ + 136, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -24,11 +28,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 36, /* va_start */ - 52, /* va_arg */ - 64, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 4, /* va_start */ + 8, /* va_arg */ + 8, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 12, /* addi */ @@ -99,12 +115,17 @@ 8, /* movi */ 12, /* movnr */ 12, /* movzr */ + 36, /* casr */ + 44, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 8, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -194,16 +215,40 @@ 16, /* bxsubi_u */ 8, /* jmpr */ 4, /* jmpi */ - 12, /* callr */ - 20, /* calli */ + 28, /* callr */ + 36, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -352,7 +397,7 @@ 36, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 24, /* movi_d */ + 28, /* movi_d */ 4, /* ldr_d */ 8, /* ldi_d */ 4, /* ldxr_d */ @@ -403,12 +448,11 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 8, /* bswapr_us */ - 16, /* bswapr_ui */ - 0, /* bswapr_ul */ - 36, /* casr */ - 44, /* casi */ -#endif /* _CALL_SYSV */ + 8, /* clo */ + 4, /* clz */ + 136, /* cto */ + 132, /* ctz */ +#endif /* !_CALL_SYSV */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -416,22 +460,26 @@ #if __WORDSIZE == 32 #if defined(__powerpc__) #if __BYTE_ORDER == __BIG_ENDIAN -#if !_CALL_SYSV +#if _CALL_SYSV #define JIT_INSTR_MAX 136 0, /* data */ 0, /* live */ - 0, /* align */ + 28, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ - 136, /* prolog */ + 124, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -439,11 +487,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 4, /* va_start */ - 8, /* va_arg */ - 8, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 36, /* va_start */ + 52, /* va_arg */ + 64, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 12, /* addi */ @@ -477,7 +537,7 @@ 12, /* remr_u */ 20, /* remi_u */ 4, /* andr */ - 12, /* andi */ + 4, /* andi */ 4, /* orr */ 12, /* ori */ 4, /* xorr */ @@ -512,14 +572,19 @@ 16, /* nei */ 4, /* movr */ 8, /* movi */ - 12, /* movnr */ - 12, /* movzr */ + 12, /* movnr */ + 12, /* movzr */ + 36, /* casr */ + 44, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 8, /* bswapr_us */ + 16, /* bswapr_ui */ + 0, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -609,16 +674,40 @@ 16, /* bxsubi_u */ 8, /* jmpr */ 4, /* jmpi */ - 28, /* callr */ - 40, /* calli */ + 12, /* callr */ + 20, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -673,7 +762,7 @@ 24, /* unordi_f */ 12, /* truncr_f_i */ 0, /* truncr_f_l */ - 20, /* extr_f */ + 36, /* extr_f */ 4, /* extr_d_f */ 4, /* movr_f */ 12, /* movi_f */ @@ -764,10 +853,10 @@ 32, /* unordi_d */ 12, /* truncr_d_i */ 0, /* truncr_d_l */ - 20, /* extr_d */ + 36, /* extr_d */ 4, /* extr_f_d */ 4, /* movr_d */ - 24, /* movi_d */ + 28, /* movi_d */ 4, /* ldr_d */ 8, /* ldi_d */ 4, /* ldxr_d */ @@ -818,25 +907,25 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 20, /* bswapr_us */ - 16, /* bswapr_ui */ - 0, /* bswapr_ul */ - 36, /* casr */ - 44, /* casi */ -#endif /* _CALL_AIX */ -#endif /* __BYTEORDER */ + 8, /* clo */ + 4, /* clz */ + 136, /* cto */ + 132, /* ctz */ +#endif /* _CALL_SYSV */ +#endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 #if defined(__powerpc__) #if __BYTE_ORDER == __BIG_ENDIAN -#define JIT_INSTR_MAX 148 +#define JIT_INSTR_MAX 236 0, /* data */ 0, /* live */ - 4, /* align */ + 28, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -845,7 +934,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -853,8 +945,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ @@ -928,12 +1032,17 @@ 36, /* movi */ 12, /* movnr */ 12, /* movzr */ + 36, /* casr */ + 44, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ + 8, /* bswapr_us */ + 16, /* bswapr_ui */ + 44, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 4, /* htonr_ul */ @@ -1026,13 +1135,37 @@ 28, /* callr */ 52, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -1232,11 +1365,10 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 8, /* bswapr_us */ - 16, /* bswapr_ui */ - 44, /* bswapr_ul */ - 36, /* casr */ - 44, /* casi */ + 8, /* clo */ + 4, /* clz */ + 236, /* cto */ + 232, /* ctz */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ @@ -1244,12 +1376,13 @@ #if __WORDSIZE == 64 #if defined(__powerpc__) #if __BYTE_ORDER == __LITTLE_ENDIAN -#define JIT_INSTR_MAX 124 +#define JIT_INSTR_MAX 236 0, /* data */ 0, /* live */ - 4, /* align */ + 20, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -1258,7 +1391,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -1266,8 +1402,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ @@ -1341,12 +1489,17 @@ 36, /* movi */ 12, /* movnr */ 12, /* movzr */ + 36, /* casr */ + 44, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ + 8, /* bswapr_us */ + 16, /* bswapr_ui */ + 44, /* bswapr_ul */ 8, /* htonr_us */ 16, /* htonr_ui */ 44, /* htonr_ul */ @@ -1439,13 +1592,37 @@ 12, /* callr */ 32, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -1645,11 +1822,10 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 8, /* bswapr_us */ - 16, /* bswapr_ui */ - 44, /* bswapr_ul */ - 36, /* casr */ - 44, /* casi */ + 8, /* clo */ + 4, /* clz */ + 236, /* cto */ + 232, /* ctz */ #endif /* __BYTE_ORDER */ #endif /* __powerpc__ */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_ppc.c b/deps/lightning/lib/jit_ppc.c index 5d2b74b1..869e876e 100644 --- a/deps/lightning/lib/jit_ppc.c +++ b/deps/lightning/lib/jit_ppc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -291,20 +291,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -364,7 +362,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -404,12 +402,16 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; jit_bool_t incr = 1; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) { offset = _jitc->function->self.argi++; #if _CALL_SYSV @@ -420,7 +422,7 @@ _jit_arg(jit_state_t *_jit) offset = _jitc->function->self.size; if (incr) _jitc->function->self.size += sizeof(jit_word_t); - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -498,7 +500,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, JIT_RA0 - v->u.w); @@ -510,7 +512,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, JIT_RA0 - v->u.w); @@ -522,7 +524,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, JIT_RA0 - v->u.w); @@ -534,7 +536,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, JIT_RA0 - v->u.w); @@ -546,7 +548,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) { #if __WORDSIZE == 32 @@ -564,7 +566,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, JIT_RA0 - v->u.w); @@ -576,7 +578,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); @@ -587,10 +589,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) #endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(JIT_RA0 - v->u.w, u); else @@ -599,11 +601,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - jit_inc_synth_wp(putargi, u, v); - assert(v->code == jit_code_arg); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(JIT_RA0 - v->u.w, u); else { @@ -698,11 +700,11 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { jit_bool_t incr = 1; assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(JIT_RA0 - _jitc->function->call.argi, u); @@ -719,12 +721,12 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; jit_bool_t incr = 1; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(JIT_RA0 - _jitc->function->call.argi, u); @@ -1153,6 +1155,7 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1293,6 +1296,9 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; @@ -1368,6 +1374,10 @@ _emit_code(jit_state_t *_jit) # endif case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case jit_code_casr: casr(rn(node->u.w), rn(node->v.w), rn(node->w.q.l), rn(node->w.q.h)); @@ -1691,7 +1701,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (can_sign_extend_jump_p(word)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1699,36 +1714,45 @@ _emit_code(jit_state_t *_jit) jmpi(node->u.w); break; case jit_code_callr: - callr(rn(node->u.w) #if _CALL_SYSV - , !!(node->flag & jit_flag_varargs) +# define xcallr(u, v) callr(u, v) +# define xcalli_p(u, v) calli_p(u, v) +# define xcalli(u, v) calli(u, v) +#else +# define xcallr(u, v) callr(u) +# define xcalli_p(u, v) calli_p(u) +# define xcalli(u, v) calli(u) #endif - ); + xcallr(rn(node->u.w), !!(node->flag & jit_flag_varargs)); break; case jit_code_calli: + value = !!(node->flag & jit_flag_varargs); if (node->flag & jit_flag_node) { temp = node->u.n; assert(temp->code == jit_code_label || temp->code == jit_code_epilog); - word = calli_p(temp->u.w + if (temp->flag & jit_flag_patch) + xcalli(temp->u.w, value); + else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); #if _CALL_SYSV - , !!(node->flag & jit_flag_varargs) + if (can_sign_extend_jump_p(word + value * 4)) + word = xcalli(_jit->pc.w, value); + else #endif - ); - if (!(temp->flag & jit_flag_patch)) + word = xcalli_p(_jit->pc.w, value); patch(word, node); + } } else - calli(node->u.w -#if _CALL_SYSV - , !!(node->flag & jit_flag_varargs) -#endif - ); + xcalli(node->u.w, value); break; case jit_code_prolog: _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif @@ -1772,6 +1796,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif @@ -1796,14 +1830,26 @@ _emit_code(jit_state_t *_jit) case jit_code_va_arg_d: vaarg_d(rn(node->u.w), rn(node->v.w)); break; - case jit_code_live: - case jit_code_arg: case jit_code_ellipsis: + case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1813,10 +1859,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: diff --git a/deps/lightning/lib/jit_print.c b/deps/lightning/lib/jit_print.c index a6f93380..f3409fbb 100644 --- a/deps/lightning/lib/jit_print.c +++ b/deps/lightning/lib/jit_print.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -22,8 +22,12 @@ #if __WORDSIZE == 32 # define MININT 0x80000000 +# define DEC_FMT "%d" +# define HEX_FMT "0x%x" #else # define MININT 0x8000000000000000 +# define DEC_FMT "%ld" +# define HEX_FMT "0x%lx" #endif @@ -31,11 +35,11 @@ #define print_hex(value) \ do { \ if (value < 0 && value != MININT) \ - fprintf(print_stream, "-0x%lx", -value); \ + fprintf(print_stream, "-" HEX_FMT, (jit_uword_t)-value); \ else \ - fprintf(print_stream, "0x%lx", value); \ + fprintf(print_stream, HEX_FMT, (jit_uword_t)value); \ } while (0) -#define print_dec(value) fprintf(print_stream, "%ld", value) +#define print_dec(value) fprintf(print_stream, DEC_FMT, value) #define print_flt(value) fprintf(print_stream, "%g", value) #define print_str(value) fprintf(print_stream, "%s", value) #define print_ptr(value) fprintf(print_stream, "%p", value) diff --git a/deps/lightning/lib/jit_rewind.c b/deps/lightning/lib/jit_rewind.c index 89e94916..8da80212 100644 --- a/deps/lightning/lib/jit_rewind.c +++ b/deps/lightning/lib/jit_rewind.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015-2022 Free Software Foundation, Inc. + * Copyright (C) 2015-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -54,13 +54,9 @@ _rewind_prolog(jit_state_t *_jit) _jitc->function->self.size = stack_framesize; #if __arm__ assert(jit_cpu.abi); - _jitc->function->self.size += 64; -#endif -#if __mips__ && NEW_ABI - /* Only add extra stack space if there are varargs - * arguments in registers. */ - assert(jit_arg_reg_p(_jitc->function->self.argi)); - _jitc->function->self.size += 64; + _jitc->function->alist = NULL; +#elif __mips__ + _jitc->function->alist = NULL; #endif _jitc->function->self.argi = _jitc->function->self.argf = _jitc->function->self.argn = 0; @@ -71,9 +67,10 @@ _rewind_prolog(jit_state_t *_jit) for (; node; node = next) { next = node->next; switch (node->code) { - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: node->next = (jit_node_t *)0; - jit_make_arg(node); + jit_make_arg(node, node->code); break; case jit_code_arg_f: node->next = (jit_node_t *)0; diff --git a/deps/lightning/lib/jit_riscv-cpu.c b/deps/lightning/lib/jit_riscv-cpu.c index 2ae11b92..4fd35a8f 100644 --- a/deps/lightning/lib/jit_riscv-cpu.c +++ b/deps/lightning/lib/jit_riscv-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 Free Software Foundation, Inc. + * Copyright (C) 2019-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -93,10 +93,6 @@ typedef union { # undef ui } instr_t; # define ii(i) *_jit->pc.ui++ = i -/* FIXME could jit_rewind_prolog() to only use extra 64 bytes - * if a variadic jit function that have variadic arguments in - * registers */ -# define stack_framesize (200 + 64) # define ldr(r0, r1) ldr_l(r0, r1) # define ldi(r0, im) ldi_l(r0, im) # define ldxr(r0, r1, r2) ldxr_l(r0, r1, r2) @@ -579,12 +575,12 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t); # define jmpr(r0) JALR(_ZERO_REGNO, r0, 0) # define jmpi(im) _jmpi(_jit, im) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); # define jmpi_p(im) _jmpi_p(_jit, im) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) JALR(_RA_REGNO, r0, 0) # define calli(im) _calli(_jit, im) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(im) _calli_p(_jit, im) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(i0) _prolog(_jit,i0) @@ -2087,12 +2083,13 @@ _bmci(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0) return (w); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { jit_int32_t t0; - jit_word_t dsp; - dsp = i0 - _jit->pc.w; + jit_word_t dsp, w; + w = _jit->pc.w; + dsp = i0 - w; if (simm20_p(dsp)) JAL(_ZERO_REGNO, dsp); else { @@ -2101,6 +2098,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) jmpr(rn(t0)); jit_unget_reg(t0); } + return (w); } static jit_word_t @@ -2115,12 +2113,13 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0) return (w); } -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { jit_int32_t t0; - jit_word_t dsp; - dsp = i0 - _jit->pc.w; + jit_word_t dsp, w; + w = _jit->pc.w; + dsp = i0 - w; if (simm20_p(dsp)) JAL(_RA_REGNO, dsp); else { @@ -2129,6 +2128,7 @@ _calli(jit_state_t *_jit, jit_word_t i0) callr(rn(t0)); jit_unget_reg(t0); } + return (w); } static jit_word_t @@ -2146,9 +2146,10 @@ _calli_p(jit_state_t *_jit, jit_word_t i0) static void _prolog(jit_state_t *_jit, jit_node_t *node) { - jit_int32_t reg; + jit_int32_t reg, offs; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; + jit_check_frame(); assert(_jitc->function->self.aoff >= frame); if (_jitc->function->assume_frame) return; @@ -2159,56 +2160,41 @@ _prolog(jit_state_t *_jit, jit_node_t *node) _jitc->function->stack = ((_jitc->function->self.alen - /* align stack at 16 bytes */ _jitc->function->self.aoff) + 15) & -16; - subi(_SP_REGNO, _SP_REGNO, stack_framesize); - stxi(0, _SP_REGNO, _RA_REGNO); - stxi(8, _SP_REGNO, _FP_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _S1)) - stxi(16, _SP_REGNO, 9); - if (jit_regset_tstbit(&_jitc->function->regset, _S2)) - stxi(24, _SP_REGNO, 18); - if (jit_regset_tstbit(&_jitc->function->regset, _S3)) - stxi(32, _SP_REGNO, 19); - if (jit_regset_tstbit(&_jitc->function->regset, _S4)) - stxi(40, _SP_REGNO, 20); - if (jit_regset_tstbit(&_jitc->function->regset, _S5)) - stxi(48, _SP_REGNO, 21); - if (jit_regset_tstbit(&_jitc->function->regset, _S6)) - stxi(56, _SP_REGNO, 22); - if (jit_regset_tstbit(&_jitc->function->regset, _S7)) - stxi(64, _SP_REGNO, 23); - if (jit_regset_tstbit(&_jitc->function->regset, _S8)) - stxi(72, _SP_REGNO, 24); - if (jit_regset_tstbit(&_jitc->function->regset, _S9)) - stxi(80, _SP_REGNO, 25); - if (jit_regset_tstbit(&_jitc->function->regset, _S10)) - stxi(88, _SP_REGNO, 26); - if (jit_regset_tstbit(&_jitc->function->regset, _S11)) - stxi(96, _SP_REGNO, 27); - if (jit_regset_tstbit(&_jitc->function->regset, _FS0)) - stxi_d(104, _SP_REGNO, 8); - if (jit_regset_tstbit(&_jitc->function->regset, _FS1)) - stxi_d(112, _SP_REGNO, 9); - if (jit_regset_tstbit(&_jitc->function->regset, _FS2)) - stxi_d(120, _SP_REGNO, 18); - if (jit_regset_tstbit(&_jitc->function->regset, _FS3)) - stxi_d(128, _SP_REGNO, 19); - if (jit_regset_tstbit(&_jitc->function->regset, _FS4)) - stxi_d(136, _SP_REGNO, 20); - if (jit_regset_tstbit(&_jitc->function->regset, _FS5)) - stxi_d(144, _SP_REGNO, 21); - if (jit_regset_tstbit(&_jitc->function->regset, _FS6)) - stxi_d(152, _SP_REGNO, 22); - if (jit_regset_tstbit(&_jitc->function->regset, _FS7)) - stxi_d(160, _SP_REGNO, 23); - if (jit_regset_tstbit(&_jitc->function->regset, _FS8)) - stxi_d(168, _SP_REGNO, 24); - if (jit_regset_tstbit(&_jitc->function->regset, _FS9)) - stxi_d(176, _SP_REGNO, 25); - if (jit_regset_tstbit(&_jitc->function->regset, _FS10)) - stxi_d(184, _SP_REGNO, 26); - if (jit_regset_tstbit(&_jitc->function->regset, _FS11)) - stxi_d(192, _SP_REGNO, 27); - movr(_FP_REGNO, _SP_REGNO); + + if (_jitc->function->stack) + _jitc->function->need_stack = 1; + if (!_jitc->function->need_frame && !_jitc->function->need_stack) { + /* check if any callee save register needs to be saved */ + for (reg = 0; reg < _jitc->reglen; ++reg) + if (jit_regset_tstbit(&_jitc->function->regset, reg) && + (_rvs[reg].spec & jit_class_sav)) { + _jitc->function->need_stack = 1; + break; + } + } + + if (_jitc->function->need_frame || _jitc->function->need_stack) + subi(_SP_REGNO, _SP_REGNO, jit_framesize()); + if (_jitc->function->need_frame) { + stxi(0, _SP_REGNO, _RA_REGNO); + stxi(8, _SP_REGNO, _FP_REGNO); + } + /* callee save registers */ + for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + stxi(offs, _SP_REGNO, rn(iregs[reg])); + offs += sizeof(jit_word_t); + } + } + for (reg = 0; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + stxi_d(offs, _SP_REGNO, rn(fregs[reg])); + offs += sizeof(jit_float64_t); + } + } + + if (_jitc->function->need_frame) + movr(_FP_REGNO, _SP_REGNO); if (_jitc->function->stack) subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack); if (_jitc->function->allocar) { @@ -2219,7 +2205,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) } if (_jitc->function->self.call & jit_call_varargs) { for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg) - stxi(stack_framesize - ((8 - reg) * 8), + stxi(jit_framesize() - ((8 - reg) * 8), _FP_REGNO, rn(JIT_RA0 - reg)); } } @@ -2227,58 +2213,31 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg, offs; if (_jitc->function->assume_frame) return; - movr(_SP_REGNO, _FP_REGNO); - ldxi(_RA_REGNO, _SP_REGNO, 0); - ldxi(_FP_REGNO, _SP_REGNO, 8); - if (jit_regset_tstbit(&_jitc->function->regset, _S1)) - ldxi(9, _SP_REGNO, 16); - if (jit_regset_tstbit(&_jitc->function->regset, _S2)) - ldxi(18, _SP_REGNO, 24); - if (jit_regset_tstbit(&_jitc->function->regset, _S3)) - ldxi(19, _SP_REGNO, 32); - if (jit_regset_tstbit(&_jitc->function->regset, _S4)) - ldxi(20, _SP_REGNO, 40); - if (jit_regset_tstbit(&_jitc->function->regset, _S5)) - ldxi(21, _SP_REGNO, 48); - if (jit_regset_tstbit(&_jitc->function->regset, _S6)) - ldxi(22, _SP_REGNO, 56); - if (jit_regset_tstbit(&_jitc->function->regset, _S7)) - ldxi(23, _SP_REGNO, 64); - if (jit_regset_tstbit(&_jitc->function->regset, _S8)) - ldxi(24, _SP_REGNO, 72); - if (jit_regset_tstbit(&_jitc->function->regset, _S9)) - ldxi(25, _SP_REGNO, 80); - if (jit_regset_tstbit(&_jitc->function->regset, _S10)) - ldxi(26, _SP_REGNO, 88); - if (jit_regset_tstbit(&_jitc->function->regset, _S11)) - ldxi(27, _SP_REGNO, 96); - if (jit_regset_tstbit(&_jitc->function->regset, _FS0)) - ldxi_d(8, _SP_REGNO, 104); - if (jit_regset_tstbit(&_jitc->function->regset, _FS1)) - ldxi_d(9, _SP_REGNO, 112); - if (jit_regset_tstbit(&_jitc->function->regset, _FS2)) - ldxi_d(18, _SP_REGNO, 120); - if (jit_regset_tstbit(&_jitc->function->regset, _FS3)) - ldxi_d(19, _SP_REGNO, 128); - if (jit_regset_tstbit(&_jitc->function->regset, _FS4)) - ldxi_d(20, _SP_REGNO, 136); - if (jit_regset_tstbit(&_jitc->function->regset, _FS5)) - ldxi_d(21, _SP_REGNO, 144); - if (jit_regset_tstbit(&_jitc->function->regset, _FS6)) - ldxi_d(22, _SP_REGNO, 152); - if (jit_regset_tstbit(&_jitc->function->regset, _FS7)) - ldxi_d(23, _SP_REGNO, 160); - if (jit_regset_tstbit(&_jitc->function->regset, _FS8)) - ldxi_d(24, _SP_REGNO, 168); - if (jit_regset_tstbit(&_jitc->function->regset, _FS9)) - ldxi_d(25, _SP_REGNO, 176); - if (jit_regset_tstbit(&_jitc->function->regset, _FS10)) - ldxi_d(26, _SP_REGNO, 184); - if (jit_regset_tstbit(&_jitc->function->regset, _FS11)) - ldxi_d(27, _SP_REGNO, 192); - addi(_SP_REGNO, _SP_REGNO, stack_framesize); + if (_jitc->function->need_frame) { + movr(_SP_REGNO, _FP_REGNO); + ldxi(_RA_REGNO, _SP_REGNO, 0); + ldxi(_FP_REGNO, _SP_REGNO, 8); + } + + /* callee save registers */ + for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + ldxi(rn(iregs[reg]), _SP_REGNO, offs); + offs += sizeof(jit_word_t); + } + } + for (reg = 0; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + ldxi_d(rn(fregs[reg]), _SP_REGNO, offs); + offs += sizeof(jit_float64_t); + } + } + + if (_jitc->function->need_frame || _jitc->function->need_stack) + addi(_SP_REGNO, _SP_REGNO, jit_framesize()); RET(); } @@ -2288,9 +2247,9 @@ _vastart(jit_state_t *_jit, jit_int32_t r0) assert(_jitc->function->self.call & jit_call_varargs); /* Initialize va_list to the first stack argument. */ if (jit_arg_reg_p(_jitc->function->vagp)) - addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8)); + addi(r0, _FP_REGNO, jit_framesize() - ((8 - _jitc->function->vagp) * 8)); else - addi(r0, _FP_REGNO, _jitc->function->self.size); + addi(r0, _FP_REGNO, jit_selfsize()); } static void @@ -2333,7 +2292,6 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) } else abort(); - i.w = u.i[1]; assert(i.I.opcode == 3 && i.I.funct3 == 3); /* LD */ } # else diff --git a/deps/lightning/lib/jit_riscv-fpu.c b/deps/lightning/lib/jit_riscv-fpu.c index e7884cb9..89346e08 100644 --- a/deps/lightning/lib/jit_riscv-fpu.c +++ b/deps/lightning/lib/jit_riscv-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 Free Software Foundation, Inc. + * Copyright (C) 2019-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_riscv-sz.c b/deps/lightning/lib/jit_riscv-sz.c index 8c4cf048..335d3cfe 100644 --- a/deps/lightning/lib/jit_riscv-sz.c +++ b/deps/lightning/lib/jit_riscv-sz.c @@ -1,10 +1,11 @@ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 116 +#define JIT_INSTR_MAX 168 0, /* data */ 0, /* live */ 4, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -13,7 +14,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -21,8 +25,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ @@ -96,12 +112,17 @@ 12, /* movi */ 12, /* movnr */ 12, /* movzr */ + 28, /* casr */ + 40, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 4, /* extr_i */ 8, /* extr_ui */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ 20, /* htonr_us */ 52, /* htonr_ui */ 116, /* htonr_ul */ @@ -194,13 +215,37 @@ 4, /* callr */ 16, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -394,15 +439,14 @@ 4, /* movr_w_f */ 0, /* movr_ww_d */ 4, /* movr_w_d */ - 0, /* movr_f_w */ + 4, /* movr_f_w */ 4, /* movi_f_w */ 0, /* movr_d_ww */ 0, /* movi_d_ww */ 4, /* movr_d_w */ 12, /* movi_d_w */ - 20, /* bswapr_us */ - 52, /* bswapr_ui */ - 116, /* bswapr_ul */ - 28, /* casr */ - 40, /* casi */ + 168, /* clo */ + 148, /* clz */ + 168, /* cto */ + 148, /* ctz */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_riscv.c b/deps/lightning/lib/jit_riscv.c index 8828d4ab..63a5cd9b 100644 --- a/deps/lightning/lib/jit_riscv.c +++ b/deps/lightning/lib/jit_riscv.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2022 Free Software Foundation, Inc. + * Copyright (C) 2019-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -17,6 +17,10 @@ * Paulo Cesar Pereira de Andrade */ +/* callee save + variadic arguments + * align16(ra+fp+s[1-9]+s10+s11+fs[0-9]+fs10+fs11)+align16(a[0-7]) */ +#define stack_framesize (208 + 64) + #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 8) #define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) @@ -28,6 +32,8 @@ typedef jit_pointer_t jit_va_list_t; /* * Prototypes */ +#define compute_framesize() _compute_framesize(_jit) +static void _compute_framesize(jit_state_t*); #if __WORDSIZE == 64 # define load_const(r0, i0) _load_const(_jit, r0, i0) static void _load_const(jit_state_t*, jit_int32_t, jit_word_t); @@ -43,6 +49,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" +# include "jit_fallback.c" #undef PROTO /* @@ -119,6 +126,14 @@ jit_register_t _rvs[] = { { _NOREG, "" }, }; +static jit_int32_t iregs[] = { + _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8, _S9, _S10, _S11 +}; + +static jit_int32_t fregs[] = { + _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7, _FS8, _FS9, _FS10, _FS11 +}; + /* * Implementation */ @@ -180,6 +195,7 @@ jit_int32_t _jit_allocai(jit_state_t *_jit, jit_int32_t length) { assert(_jitc->function); + jit_check_frame(); switch (length) { case 0: case 1: break; case 2: _jitc->function->self.aoff &= -2; break; @@ -228,20 +244,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -301,16 +315,17 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); - return (jit_arg_f_reg_p(u->u.w)); + return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8)); } void _jit_ellipsis(jit_state_t *_jit) { jit_inc_synth(ellipsis); + jit_check_frame(); if (_jitc->prepare) { jit_link_prepare(); assert(!(_jitc->function->call.call & jit_call_varargs)); @@ -334,19 +349,23 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); + jit_check_frame(); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -368,6 +387,7 @@ _jit_arg_f(jit_state_t *_jit) else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_f, offset, ++_jitc->function->self.argn); @@ -391,6 +411,7 @@ _jit_arg_d(jit_state_t *_jit) else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_d, offset, ++_jitc->function->self.argn); @@ -401,111 +422,129 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, JIT_RA0 - v->u.w); - else - jit_ldxi_c(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_c(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, JIT_RA0 - v->u.w); - else - jit_ldxi_uc(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_uc(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, JIT_RA0 - v->u.w); - else - jit_ldxi_s(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_s(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, JIT_RA0 - v->u.w); - else - jit_ldxi_us(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_us(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_i(u, JIT_RA0 - v->u.w); - else - jit_ldxi_i(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_i(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, JIT_RA0 - v->u.w); - else - jit_ldxi_ui(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_ui(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); - else - jit_ldxi_l(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_l(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(JIT_RA0 - v->u.w, u); - else - jit_stxi(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(JIT_RA0 - v->u.w, u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(v->u.w, JIT_FP, regno); + node = jit_stxi(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -520,8 +559,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_f(u, JIT_FA0 - v->u.w); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8)); - else - jit_ldxi_f(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_f(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -534,8 +575,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_f(JIT_FA0 - v->u.w, u); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u); - else - jit_stxi_f(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi_f(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -547,18 +590,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) jit_inc_synth_fp(putargi_f, u, v); if (jit_arg_f_reg_p(v->u.w)) jit_movi_f(JIT_FA0 - v->u.w, u); - else if (jit_arg_reg_p(v->u.w - 8)) { - union { - jit_float32_t f; - jit_int32_t i; - } uu; - uu.f = u; - jit_movi(JIT_RA0 - (v->u.w - 8), uu.i); - } + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); - jit_stxi_f(v->u.w, JIT_FP, regno); + node = jit_stxi_f(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -573,8 +612,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_d(u, JIT_FA0 - v->u.w); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8)); - else - jit_ldxi_d(u, JIT_FP, v->u.w); + else { + jit_node_t *node = jit_ldxi_d(u, JIT_FP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -587,8 +628,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_d(JIT_FA0 - v->u.w, u); else if (jit_arg_reg_p(v->u.w - 8)) jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u); - else - jit_stxi_d(v->u.w, JIT_FP, u); + else { + jit_node_t *node = jit_stxi_d(v->u.w, JIT_FP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -600,28 +643,24 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) jit_inc_synth_dp(putargi_d, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi_d(JIT_FA0 - v->u.w, u); - else if (jit_arg_reg_p(v->u.w - 8)) { - union { - jit_float64_t d; - jit_int64_t w; - } uu; - uu.d = u; - jit_movi(JIT_RA0 - (v->u.w - 8), uu.w); - } + else if (jit_arg_reg_p(v->u.w - 8)) + jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u); else { + jit_node_t *node; regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); - jit_stxi_d(v->u.w, JIT_FP, regno); + node = jit_stxi_d(v->u.w, JIT_FP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(JIT_RA0 - _jitc->function->call.argi, u); @@ -630,16 +669,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) else { jit_stxi(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(JIT_RA0 - _jitc->function->call.argi, u); @@ -651,6 +691,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) jit_stxi(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -673,6 +714,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) else { jit_stxi_f(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -699,6 +741,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) jit_stxi_f(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -721,6 +764,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) else { jit_stxi_d(_jitc->function->call.size, JIT_SP, u); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -747,6 +791,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_stxi_d(_jitc->function->call.size, JIT_SP, regno); jit_unget_reg(regno); _jitc->function->call.size += sizeof(jit_word_t); + jit_check_frame(); } jit_dec_synth(); } @@ -775,6 +820,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) { jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishr, r0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; @@ -792,6 +838,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) { jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishi, (jit_word_t)i0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; @@ -892,6 +939,7 @@ _emit_code(jit_state_t *_jit) jit_node_t *node; jit_uint8_t *data; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1066,6 +1114,9 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; @@ -1113,6 +1164,14 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); +#define clor(r0, r1) fallback_clo(r0, r1) +#define clzr(r0, r1) fallback_clz(r0, r1) +#define ctor(r0, r1) fallback_cto(r0, r1) +#define ctzr(r0, r1) fallback_ctz(r0, r1) + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_rrr(and,); case_rrw(and,); case_rrr(or,); @@ -1434,6 +1493,7 @@ _emit_code(jit_state_t *_jit) case_brr(bunord, _d); case_brd(bunord); case jit_code_jmpr: + jit_check_frame(); jmpr(rn(node->u.w)); break; case jit_code_jmpi: @@ -1444,14 +1504,22 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (simm20_p(word)) + word = jmpi(_jit->pc.w); + else word = jmpi_p(_jit->pc.w); patch(word, node); } } - else + else { + jit_check_frame(); jmpi(node->u.w); + } break; case jit_code_callr: + jit_check_frame(); callr(rn(node->u.w)); break; case jit_code_calli: @@ -1462,22 +1530,32 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) calli(temp->u.w); else { - word = calli_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (simm20_p(word)) + word = calli(_jit->pc.w); + else + word = calli_p(_jit->pc.w); patch(word, node); } } - else + else { + jit_check_frame(); calli(node->u.w); + } break; case jit_code_prolog: _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif undo.patch_offset = _jitc->patches.offset; restart_function: + compute_framesize(); + patch_alist(0); _jitc->again = 0; prolog(node); break; @@ -1493,10 +1571,25 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + undo.func.need_frame = _jitc->function->need_frame; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + /* this will be recomputed but undo anyway to have it + * better self documented.*/ + undo.func.need_stack = _jitc->function->need_stack; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif _jitc->patches.offset = undo.patch_offset; + patch_alist(1); goto restart_function; } /* remember label is defined */ @@ -1537,11 +1630,19 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: + case jit_code_arg_l: case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1549,10 +1650,22 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_i: case jit_code_getarg_ui: case jit_code_getarg_l: case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1659,6 +1772,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_riscv-cpu.c" # include "jit_riscv-fpu.c" +# include "jit_fallback.c" #undef CODE static void @@ -1806,6 +1920,30 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) stxi_d(i0, rn(r0), rn(r1)); } +#if __WORDSIZE != 64 +# error "only 64 bit ports tested" +#endif +static void +_compute_framesize(jit_state_t *_jit) +{ + jit_int32_t reg; + _jitc->framesize = 16; /* ra+fp */ + for (reg = 0; reg < jit_size(iregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) + _jitc->framesize += sizeof(jit_word_t); + + for (reg = 0; reg < jit_size(fregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) + _jitc->framesize += sizeof(jit_float64_t); + + /* Space to store variadic arguments */ + if (_jitc->function->self.call & jit_call_varargs) + _jitc->framesize += (8 - _jitc->function->vagp) * 8; + + /* Make sure functions called have a 16 byte aligned stack */ + _jitc->framesize = (_jitc->framesize + 15) & -16; +} + static void _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) { diff --git a/deps/lightning/lib/jit_s390-cpu.c b/deps/lightning/lib/jit_s390-cpu.c index 55b7e1fe..2e9e074f 100644 --- a/deps/lightning/lib/jit_s390-cpu.c +++ b/deps/lightning/lib/jit_s390-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -394,6 +394,8 @@ static void _nop(jit_state_t*,jit_int32_t); # define EAR(R1,R2) RRE_(0xB24F,R1,R2) /* EXTRACT PSW */ # define EPSW(R1,R2) RRE_(0xB98D,R1,R2) +/* FIND LEFTMOST ONE */ +# define FLOGR(R1,R2) RRE_(0xB983,R1,R2) /* INSERT CHARACTER */ # define IC(R1,D2,X2,B2) RX_(0x43,R1,X2,B2,D2) # define ICY(R1,D2,X2,B2) RXY_(0xE3,R1,X2,B2,D2,0x73) @@ -966,9 +968,14 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t); static void _movi(jit_state_t*,jit_int32_t,jit_word_t); # define movi_p(r0,i0) _movi_p(_jit,r0,i0) static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t); -# define bswapr_us(r0, r1) generic_bswapr_us(_jit, r0, r1) -# define bswapr_ui(r0, r1) generic_bswapr_ui(_jit, r0, r1) -# define bswapr_ul(r0, r1) generic_bswapr_ul(_jit, r0, r1) +# define bswapr_us(r0, r1) _bswapr_us(_jit, r0, r1) +static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t); +# define bswapr_ui(r0, r1) _bswapr_ui(_jit, r0, r1) +static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t); +# if __WORDSIZE == 64 +#define bswapr_ul(r0, r1) _bswapr_ul(_jit, r0, r1) +static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t); +#endif # define movnr(r0,r1,r2) _movnr(_jit,r0,r1,r2) static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); # define movzr(r0,r1,r2) _movzr(_jit,r0,r1,r2) @@ -1051,32 +1058,39 @@ static void _qdivi_u(jit_state_t*,jit_int32_t, # if __WORDSIZE == 32 # define lshr(r0,r1,r2) _lshr(_jit,r0,r1,r2) static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); -# else -# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2) -# endif -# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) +# define lshi(r0,r1,i0) _lshi(_jit,r0,r1,i0) static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# if __WORDSIZE == 32 # define rshr(r0,r1,r2) _rshr(_jit,r0,r1,r2) static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); -# else -# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2) -# endif -# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0) +# define rshi(r0,r1,i0) _rshi(_jit,r0,r1,i0); static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); -# if __WORDSIZE == 32 # define rshr_u(r0,r1,r2) _rshr_u(_jit,r0,r1,r2) static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t); +# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) +static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # else +# define lshr(r0,r1,r2) SLLG(r0,r1,0,r2) +# define lshi(r0,r1,i0) SLLG(r0,r1,i0,0) +# define rshr(r0,r1,r2) SRAG(r0,r1,0,r2) +# define rshi(r0,r1,i0) SRAG(r0,r1,i0,0) # define rshr_u(r0,r1,r2) SRLG(r0,r1,0,r2) +# define rshi_u(r0,r1,i0) SRLG(r0,r1,i0,0) # endif -# define rshi_u(r0,r1,i0) _rshi_u(_jit,r0,r1,i0) -static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t); # if __WORDSIZE == 32 # define negr(r0,r1) LCR(r0,r1) # else # define negr(r0,r1) LCGR(r0,r1) # endif +# define bitswap(r0, r1) _bitswap(_jit, r0, r1) +static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t); +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0, r1) _clzr(_jit, r0, r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); # define comr(r0,r1) _comr(_jit,r0,r1) static void _comr(jit_state_t*,jit_int32_t,jit_int32_t); # define andr(r0,r1,r2) _andr(_jit,r0,r1,r2) @@ -1289,13 +1303,13 @@ static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t); # define bmci(i0,r0,i1) bmxi(CC_E,i0,r0,i1) # define bmci_p(i0,r0,i1) bmxi_p(CC_E,i0,r0,i1) # define jmpr(r0) BR(r0) -# define jmpi(i0) _jmpi(_jit,i0) -static void _jmpi(jit_state_t*,jit_word_t); +# define jmpi(i0,i1) _jmpi(_jit,i0,i1) +static jit_word_t _jmpi(jit_state_t*,jit_word_t, jit_bool_t); # define jmpi_p(i0) _jmpi_p(_jit,i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) BALR(_R14_REGNO,r0) -# define calli(i0) _calli(_jit,i0) -static void _calli(jit_state_t*,jit_word_t); +# define calli(i0,i1) _calli(_jit,i0,i1) +static jit_word_t _calli(jit_state_t*,jit_word_t, jit_bool_t); # define calli_p(i0) _calli_p(_jit,i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(i0) _prolog(_jit,i0) @@ -2473,6 +2487,31 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) patch_at(w, _jit->pc.w); } +static void +_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + LRVR(r0, r1); + SRL(r0, 16, 0); + LLGHR(r0, r0); +} + +static void +_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + LRVR(r0, r1); +# if __WORDSIZE == 64 + LLGFR(r0, r0); +# endif +} + +#if __WORDSIZE == 64 +static void +_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + LRVGR(r0, r1); +} +#endif + static void _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t r3, jit_word_t i0) @@ -2897,19 +2936,14 @@ _lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) SLL(r0, 0, r2); } } -#endif static void _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; - reg = jit_get_reg_but_zero(0); - movi(rn(reg), i0); - lshr(r0, r1, rn(reg)); - jit_unget_reg_but_zero(reg); + movr(r0, r1); + SLL(r0, i0, 0); } -# if __WORDSIZE == 32 static void _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -2926,19 +2960,14 @@ _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) SRA(r0, 0, r2); } } -#endif static void _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; - reg = jit_get_reg_but_zero(0); - movi(rn(reg), i0); - rshr(r0, r1, rn(reg)); - jit_unget_reg_but_zero(reg); + movr(r0, r1); + SRA(r0, i0, 0); } -# if __WORDSIZE == 32 static void _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -2955,16 +2984,141 @@ _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) SRL(r0, 0, r2); } } -#endif static void _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { - jit_int32_t reg; - reg = jit_get_reg_but_zero(0); - movi(rn(reg), i0); - rshr_u(r0, r1, rn(reg)); - jit_unget_reg_but_zero(reg); + movr(r0, r1); + SRL(r0, i0, 0); +} +#endif + +static void +_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1, t2, t3, t4; + movr(r0, r1); + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + t2 = jit_get_reg(jit_class_gpr); + movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); + rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); + rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); + rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); + rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# if __WORDSIZE == 32 + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + lshi(rn(t2), r0, 16); /* t2 = v << 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# else + movi(rn(t0), 0x0000ffff0000ffffL); + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ + lshi(rn(t2), r0, 32); /* t2 = v << 32 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# endif + jit_unget_reg(t2); + jit_unget_reg(t1); + jit_unget_reg(t0); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ +#if CHECK_FLOGR + if (jit_cpu.flogr) { +#endif + comr(r0, r1); + clzr(r0, r0); +#if CHECK_FLOGR + } + else + fallback_clo(r0, r1); +#endif +} + +static void +_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ +#if CHECK_FLOGR + if (jit_cpu.flogr) { +#endif +#if __WORDSIZE == 32 + jit_word_t w; +#endif + jit_int32_t regno; + regno = jit_get_reg_pair(); +#if __WORDSIZE == 32 + SLLG(rn(regno), r1, 32, 0); +#else + movr(rn(regno), r1); +#endif + FLOGR(rn(regno), rn(regno)); + movr(r0, rn(regno)); +#if __WORDSIZE == 32 + w = blei_p(_jit->pc.w, r0, 31); + rshi(r0, r0, 1); /* r0 is 64 */ + patch_at(w, _jit->pc.w); +#endif + jit_unget_reg_pair(regno); +#if CHECK_FLOGR + } + else + fallback_clz(r0, r1); +#endif +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ +#if CHECK_FLOGR + if (jit_cpu.flogr) { +#endif + bitswap(r0, r1); + clor(r0, r0); +#if CHECK_FLOGR + } + else + fallback_cto(r0, r1); +#endif +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ +#if CHECK_FLOGR + if (jit_cpu.flogr) { +#endif + bitswap(r0, r1); + clzr(r0, r0); +#if CHECK_FLOGR + } + else + fallback_ctz(r0, r1); +#endif } static void @@ -3497,13 +3651,14 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } #endif -static void -_jmpi(jit_state_t *_jit, jit_word_t i0) +static jit_word_t +_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1) { - jit_word_t d; jit_int32_t reg; - d = (i0 - _jit->pc.w) >> 1; - if (s16_p(d)) + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 1; + if (i1 && s16_p(d)) J(x16(d)); else if (s32_p(d)) BRL(d); @@ -3513,6 +3668,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) jmpr(rn(reg)); jit_unget_reg_but_zero(reg); } + return (w); } static jit_word_t @@ -3527,13 +3683,16 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0) return (w); } -static void -_calli(jit_state_t *_jit, jit_word_t i0) +static jit_word_t +_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1) { - jit_word_t d; jit_int32_t reg; - d = (i0 - _jit->pc.w) >> 1; - if (s32_p(d)) + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 1; + if (i1 && s16_p(d)) + BRAS(_R14_REGNO, x16(d)); + else if (s32_p(d)) BRASL(_R14_REGNO, d); else { reg = jit_get_reg_but_zero(0); @@ -3541,6 +3700,7 @@ _calli(jit_state_t *_jit, jit_word_t i0) callr(rn(reg)); jit_unget_reg_but_zero(reg); } + return (w); } static jit_word_t @@ -3889,17 +4049,17 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) u.s[7] = i1.s; #endif } - /* BRC */ + /* BRC or BRL */ else if (i0.b.op == 0xA7) { - assert(i0.b.r3 == 0x4); + assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5); d = (label - instr) >> 1; assert(s16_p(d)); i1.b.i2 = d; u.s[1] = i1.s; } - /* BRCL */ + /* BRCL or BRASL */ else if (i0.b.op == 0xC0) { - assert(i0.b.r3 == 0x4); + assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5); d = (label - instr) >> 1; assert(s32_p(d)); i12.i = d; diff --git a/deps/lightning/lib/jit_s390-fpu.c b/deps/lightning/lib/jit_s390-fpu.c index edf9ddd2..6c3c4ac2 100644 --- a/deps/lightning/lib/jit_s390-fpu.c +++ b/deps/lightning/lib/jit_s390-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * diff --git a/deps/lightning/lib/jit_s390-sz.c b/deps/lightning/lib/jit_s390-sz.c index e70c65f4..ee304473 100644 --- a/deps/lightning/lib/jit_s390-sz.c +++ b/deps/lightning/lib/jit_s390-sz.c @@ -1,11 +1,11 @@ - #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 94 +#define JIT_INSTR_MAX 164 0, /* data */ 0, /* live */ - 2, /* align */ + 4, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 2, /* label */ @@ -14,7 +14,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,11 +25,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 40, /* va_start */ - 86, /* va_arg */ - 82, /* va_arg_d */ + 82, /* va_arg */ + 78, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 12, /* addi */ @@ -43,8 +58,8 @@ 14, /* rsbi */ 6, /* mulr */ 14, /* muli */ - 46, /* qmulr */ - 50, /* qmuli */ + 38, /* qmulr */ + 42, /* qmuli */ 10, /* qmulr_u */ 18, /* qmuli_u */ 10, /* divr */ @@ -66,11 +81,11 @@ 4, /* xorr */ 12, /* xori */ 8, /* lshr */ - 10, /* lshi */ + 6, /* lshi */ 8, /* rshr */ - 10, /* rshi */ + 6, /* rshi */ 8, /* rshr_u */ - 10, /* rshi_u */ + 6, /* rshi_u */ 2, /* negr */ 8, /* comr */ 16, /* ltr */ @@ -97,12 +112,17 @@ 8, /* movi */ 14, /* movnr */ 14, /* movzr */ + 22, /* casr */ + 28, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 12, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ 4, /* htonr_us */ 2, /* htonr_ui */ 0, /* htonr_ul */ @@ -191,17 +211,41 @@ 8, /* bxsubr_u */ 12, /* bxsubi_u */ 2, /* jmpr */ - 10, /* jmpi */ + 6, /* jmpi */ 2, /* callr */ - 10, /* calli */ + 6, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -401,20 +445,20 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 38, /* bswapr_us */ - 94, /* bswapr_ui */ - 0, /* bswapr_ul */ - 22, /* casr */ - 28, /* casi */ + 36, /* clo */ + 28, /* clz */ + 164, /* cto */ + 158, /* ctz */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 300 +#define JIT_INSTR_MAX 280 0, /* data */ 0, /* live */ - 6, /* align */ + 20, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 2, /* label */ @@ -423,7 +467,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -431,11 +478,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 44, /* va_start */ - 104, /* va_arg */ - 100, /* va_arg_d */ + 100, /* va_arg */ + 96, /* va_arg_d */ 0, /* va_end */ 8, /* addr */ 24, /* addi */ @@ -452,8 +511,8 @@ 28, /* rsbi */ 8, /* mulr */ 24, /* muli */ - 60, /* qmulr */ - 68, /* qmuli */ + 52, /* qmulr */ + 60, /* qmuli */ 16, /* qmulr_u */ 32, /* qmuli_u */ 12, /* divr */ @@ -475,11 +534,11 @@ 8, /* xorr */ 24, /* xori */ 6, /* lshr */ - 10, /* lshi */ + 6, /* lshi */ 6, /* rshr */ - 10, /* rshi */ + 6, /* rshi */ 6, /* rshr_u */ - 10, /* rshi_u */ + 6, /* rshi_u */ 4, /* negr */ 12, /* comr */ 20, /* ltr */ @@ -506,19 +565,24 @@ 16, /* movi */ 18, /* movnr */ 18, /* movzr */ + 30, /* casr */ + 42, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 4, /* extr_i */ 4, /* extr_ui */ + 12, /* bswapr_us */ + 8, /* bswapr_ui */ + 4, /* bswapr_ul */ 4, /* htonr_us */ 4, /* htonr_ui */ 4, /* htonr_ul */ 6, /* ldr_c */ 18, /* ldi_c */ 6, /* ldr_uc */ - 18, /* ldi_uc */ + 22, /* ldi_uc */ 6, /* ldr_s */ 18, /* ldi_s */ 6, /* ldr_us */ @@ -544,7 +608,7 @@ 14, /* ldxr_l */ 26, /* ldxi_l */ 4, /* str_c */ - 16, /* sti_c */ + 20, /* sti_c */ 4, /* str_s */ 16, /* sti_s */ 4, /* str_i */ @@ -600,17 +664,41 @@ 10, /* bxsubr_u */ 14, /* bxsubi_u */ 2, /* jmpr */ - 18, /* jmpi */ + 6, /* jmpi */ 2, /* callr */ - 18, /* calli */ + 14, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -810,9 +898,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 60, /* bswapr_us */ - 140, /* bswapr_ui */ - 300, /* bswapr_ul */ - 30, /* casr */ - 42, /* casi */ + 24, /* clo */ + 12, /* clz */ + 280, /* cto */ + 272, /* ctz */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_s390.c b/deps/lightning/lib/jit_s390.c index 30ab760c..6934b11f 100644 --- a/deps/lightning/lib/jit_s390.c +++ b/deps/lightning/lib/jit_s390.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -16,6 +16,12 @@ * Authors: * Paulo Cesar Pereira de Andrade */ +#define CHECK_FLOGR 0 + +#if CHECK_FLOGR +#include +#include +#endif #include #include @@ -88,11 +94,15 @@ extern void __clear_cache(void *, void *); #define PROTO 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" +# if CHECK_FLOGR +# include "jit_fallback.c" +# endif #undef PROTO /* * Initialization */ +jit_cpu_t jit_cpu; jit_register_t _rvs[] = { { rc(gpr) | 0x0, "%r0" }, { rc(gpr) | 0x1, "%r1" }, @@ -129,13 +139,48 @@ jit_register_t _rvs[] = { { rc(fpr) | rc(arg) | 0x0, "%f0" }, { _NOREG, "" }, }; +#if CHECK_FLOGR +static sigjmp_buf jit_env; +#endif /* * Implementation */ +#if CHECK_FLOGR +static void +sigill_handler(int signum) +{ + jit_cpu.flogr = 0; + siglongjmp(jit_env, 1); +} +#endif + void jit_get_cpu(void) { +#if CHECK_FLOGR + int r12, r13; + struct sigaction new_action, old_action; + new_action.sa_handler = sigill_handler; + sigemptyset(&new_action.sa_mask); + new_action.sa_flags = 0; + sigaction(SIGILL, NULL, &old_action); + if (old_action.sa_handler != SIG_IGN) { + sigaction(SIGILL, &new_action, NULL); + if (!sigsetjmp(jit_env, 1)) { + jit_cpu.flogr = 1; + /* flogr %r12, %r12 */ + __asm__ volatile("lgr %%r12, %0; lgr %%r13, %1;" + "flogr %%r12, %%r12;" + "lgr %1, %%r13; lgr %0, %%r12;" + : "=r" (r12), "=r" (r13)); + sigaction(SIGILL, &old_action, NULL); + } + } +#else + /* By default, assume it is available */ + jit_cpu.flogr = 1; +#endif } void @@ -240,18 +285,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); + jit_code_inc_synth_w(code, u); jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -305,7 +350,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -352,18 +397,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_word_t); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -408,7 +457,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _R2 - v->u.w); @@ -421,7 +470,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _R2 - v->u.w); @@ -434,7 +483,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _R2 - v->u.w); @@ -447,7 +496,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _R2 - v->u.w); @@ -460,7 +509,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) { #if __WORDSIZE == 32 @@ -479,7 +528,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _R2 - v->u.w); @@ -492,7 +541,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _R2 - v->u.w); @@ -503,10 +552,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) #endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_R2 - v->u.w, u); else @@ -515,11 +564,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_R2 - v->u.w, u); else { @@ -627,10 +676,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(_R2 - _jitc->function->call.argi, u); @@ -644,11 +693,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(_R2 - _jitc->function->call.argi, u); @@ -890,6 +939,7 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1032,6 +1082,9 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; + case jit_code_skip: + nop((node->u.w + 1) & ~1); + break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; @@ -1082,6 +1135,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_rrr(and,); case_rrw(and,); case_rrr(or,); @@ -1427,14 +1484,21 @@ _emit_code(jit_state_t *_jit) assert(temp->code == jit_code_label || temp->code == jit_code_epilog); if (temp->flag & jit_flag_patch) - jmpi(temp->u.w); + jmpi(temp->u.w, 1); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s32_p(word)) { + offset = s16_p(word); + word = jmpi(_jit->pc.w, offset); + } + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } else - jmpi(node->u.w); + jmpi(node->u.w, 1); break; case jit_code_callr: callr(rn(node->u.w)); @@ -1445,19 +1509,27 @@ _emit_code(jit_state_t *_jit) assert(temp->code == jit_code_label || temp->code == jit_code_epilog); if (temp->flag & jit_flag_patch) - calli(temp->u.w); + calli(temp->u.w, 1); else { - word = calli_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s32_p(word)) { + offset =s16_p(word); + word = calli(_jit->pc.w, offset); + } + else + word = calli_p(_jit->pc.w); patch(word, node); } } else - calli(node->u.w); + calli(node->u.w, 1); break; case jit_code_prolog: _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif @@ -1478,6 +1550,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif @@ -1504,11 +1586,23 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1518,10 +1612,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1565,6 +1675,9 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_s390-cpu.c" # include "jit_s390-fpu.c" +# if CHECK_FLOGR +# include "jit_fallback.c" +# endif #undef CODE void diff --git a/deps/lightning/lib/jit_size.c b/deps/lightning/lib/jit_size.c index b3e1caea..143a5d9d 100644 --- a/deps/lightning/lib/jit_size.c +++ b/deps/lightning/lib/jit_size.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -28,7 +28,7 @@ */ static jit_int16_t _szs[jit_code_last_code] = { #if GET_JIT_SIZE -# define JIT_INSTR_MAX 512 +# define JIT_INSTR_MAX 1024 #else # if defined(__i386__) || defined(__x86_64__) # include "jit_x86-sz.c" @@ -121,7 +121,15 @@ _jit_get_size(jit_state_t *_jit) break; } # endif - size += _szs[node->code]; + switch (node->code) { + /* The instructions are special because they can be arbitrarily long. */ + case jit_code_align: + case jit_code_skip: + size += node->u.w; + break; + default: + size += _szs[node->code]; + } } # if __riscv && __WORDSIZE == 64 /* Heuristically only 20% of constants are unique. */ @@ -143,7 +151,7 @@ jit_finish_size(void) { #if GET_JIT_SIZE FILE *fp; - jit_word_t offset; + int offset; /* Define a single path */ fp = fopen(JIT_SIZE_PATH, "a"); diff --git a/deps/lightning/lib/jit_sparc-cpu.c b/deps/lightning/lib/jit_sparc-cpu.c index 86eb05e1..f4ce6213 100644 --- a/deps/lightning/lib/jit_sparc-cpu.c +++ b/deps/lightning/lib/jit_sparc-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -120,6 +120,11 @@ static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) static void _f3a(jit_state_t*,jit_int32_t, jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) maybe_unused; +# define f2c1(op,rd,op3,rs1,opf,rs2) _f2c1(_jit,op,rd,op3,rs1,opf,rs2) +static void +_f2c1(jit_state_t*,jit_int32_t, jit_int32_t, + jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t) + maybe_unused; # define LDSB(rs1, rs2, rd) f3r(3, rd, 9, rs1, rs2) # define LDSBI(rs1, imm, rd) f3i(3, rd, 9, rs1, imm) # define LDSH(rs1, rs2, rd) f3r(3, rd, 10, rs1, rs2) @@ -545,6 +550,7 @@ static void _f3a(jit_state_t*,jit_int32_t, # define UNIMP(imm) f2r(0, 0, 0, imm) # define FLUSH(rs1, rs2) f3r(2, 0, 59, rs1, rs2) # define FLUSHI(rs1, im) f3i(2, 0, 59, rs1, imm) +# define LZCNT(rs2, rd) f2c1(2, rd, 54, 0, 23, rs2) # define nop(i0) _nop(_jit, i0) static void _nop(jit_state_t*, jit_int32_t); # define movr(r0, r1) _movr(_jit, r0, r1) @@ -567,6 +573,16 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t, #define casi(r0, i0, r1, r2) casx(r0, _NOREG, r1, r2, i0) # define comr(r0, r1) XNOR(r1, 0, r0) # define negr(r0, r1) NEG(r1, r0) +# define bitswap(r0, r1) _bitswap(_jit, r0, r1) +static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t); +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0, r1) _clzr(_jit, r0, r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); # define addr(r0, r1, r2) ADD(r1, r2, r0) # define addi(r0, r1, i0) _addi(_jit, r0, r1, i0) static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); @@ -941,13 +957,13 @@ _bm_w(jit_state_t*,jit_bool_t,jit_word_t,jit_int32_t,jit_word_t); # define jmpr(r0) _jmpr(_jit, r0) static void _jmpr(jit_state_t*,jit_int32_t); # define jmpi(i0) _jmpi(_jit, i0) -static void _jmpi(jit_state_t*,jit_word_t); +static jit_word_t _jmpi(jit_state_t*,jit_word_t); # define jmpi_p(i0) _jmpi_p(_jit, i0) static jit_word_t _jmpi_p(jit_state_t*,jit_word_t); # define callr(r0) _callr(_jit, r0) static void _callr(jit_state_t*,jit_int32_t); # define calli(i0) _calli(_jit, i0) -static void _calli(jit_state_t*,jit_word_t); +static jit_word_t _calli(jit_state_t*,jit_word_t); # define calli_p(i0) _calli_p(_jit, i0) static jit_word_t _calli_p(jit_state_t*,jit_word_t); # define prolog(node) _prolog(_jit, node) @@ -1182,6 +1198,26 @@ _f1(jit_state_t *_jit, jit_int32_t op, jit_int32_t disp30) ii(v.v); } +static void +_f2c1(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd, + jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2) +{ + jit_instr_t v; + assert(!(op & 0xfffffffc)); + assert(!(rd & 0xffffffe0)); + assert(!(res & 0xffffffc0)); + assert(!(rs1 & 0xffffffe0)); + assert(!(opf & 0xfffffe00)); + assert(!(rs2 & 0xfffffe00)); + v.op.b = op; + v.rd.b = rd; + v.op3.b = op3; + v.rs1.b = rs1; + v.opf.b = opf; + v.rs2.b = rs2; + ii(v.v); +} + static void _nop(jit_state_t *_jit, jit_int32_t i0) { @@ -1296,6 +1332,111 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_unget_reg(r1_reg); } +static void +_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_int32_t t0, t1, t2, t3, t4; + movr(r0, r1); + t0 = jit_get_reg(jit_class_gpr); + t1 = jit_get_reg(jit_class_gpr); + t2 = jit_get_reg(jit_class_gpr); + movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L); + rshi_u(rn(t1), r0, 1); /* t1 = v >> 1 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 1); /* t2 <<= 1 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L); + rshi_u(rn(t1), r0, 2); /* t1 = v >> 2 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 2); /* t2 <<= 2 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL); + rshi_u(rn(t1), r0, 4); /* t1 = v >> 4 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 4); /* t2 <<= 4 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + movi(rn(t0), __WORDSIZE == 32 ? 0x00ff00ffL : 0x00ff00ff00ff00ffL); + rshi_u(rn(t1), r0, 8); /* t1 = v >> 8 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 8); /* t2 <<= 8 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# if __WORDSIZE == 32 + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + lshi(rn(t2), r0, 16); /* t2 = v << 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# else + movi(rn(t0), 0x0000ffff0000ffffL); + rshi_u(rn(t1), r0, 16); /* t1 = v >> 16 */ + andr(rn(t1), rn(t1), rn(t0)); /* t1 &= t0 */ + andr(rn(t2), r0, rn(t0)); /* t2 = v & t0*/ + lshi(rn(t2), rn(t2), 16); /* t2 <<= 16 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ + rshi_u(rn(t1), r0, 32); /* t1 = v >> 32 */ + lshi(rn(t2), r0, 32); /* t2 = v << 32 */ + orr(r0, rn(t1), rn(t2)); /* v = t1 | t2 */ +# endif + jit_unget_reg(t2); + jit_unget_reg(t1); + jit_unget_reg(t0); +} + +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { + comr(r0, r1); + clzr(r0, r0); + } + else + fallback_clo(r0, r1); +} + +static void +_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { +#if __WORDSIZE == 32 + jit_word_t w; + SLLXI(r1, 32, r0); + LZCNT(r0, r0); +#if __WORDSIZE == 32 + w = blei(_jit->pc.w, r0, 31); + rshi(r0, r0, 1); /* r0 is 64 */ + patch_at(w, _jit->pc.w); +#endif +#else + LZCNT(r1, r0); + } + else + fallback_clz(r0, r1); +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { + bitswap(r0, r1); + clor(r0, r0); + } + else + fallback_cto(r0, r1); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + if (jit_cpu.lzcnt) { + bitswap(r0, r1); + clzr(r0, r0); + } + else + fallback_ctz(r0, r1); +} + static void _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { @@ -2310,7 +2451,7 @@ _bw(jit_state_t *_jit, jit_int32_t cc, # if __WORDSIZE == 32 B(cc, (i0 - w) >> 2); # else - B(cc, (i0 - w) >> 2); + BP(cc, (i0 - w) >> 2); # endif NOP(); } @@ -2430,14 +2571,15 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) NOP(); } -static void +static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; jit_int32_t reg; - w = (i0 - _jit->pc.w) >> 2; - if (s22_p(w)) { - BA(w); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s22_p(d)) { + BA(d); NOP(); } else { @@ -2446,6 +2588,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0) jmpr(rn(reg)); jit_unget_reg(reg); } + return (w); } static jit_word_t @@ -2467,17 +2610,19 @@ _callr(jit_state_t *_jit, jit_int32_t r0) NOP(); } -static void +static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { - jit_word_t w; - w = (i0 - _jit->pc.w) >> 2; - if (s30_p(w)) { - CALLI(w); + jit_word_t d, w; + w = _jit->pc.w; + d = (i0 - w) >> 2; + if (s30_p(d)) { + CALLI(d); NOP(); } else - (void)calli_p(i0); + w = calli_p(i0); + return (w); } static jit_word_t @@ -2551,24 +2696,24 @@ _epilog(jit_state_t *_jit, jit_node_t *node) { if (_jitc->function->assume_frame) return; - /* (most) other backends do not save incoming arguments, so, - * only save locals here */ + if (_jitc->function->allocar) + subi(_SP_REGNO, _FP_REGNO, _jitc->function->stack); if (jit_regset_tstbit(&_jitc->function->regset, _L0)) - ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0)); + ldxi(_L0_REGNO, _SP_REGNO, _jitc->function->stack + OFF(0)); if (jit_regset_tstbit(&_jitc->function->regset, _L1)) - ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1)); + ldxi(_L1_REGNO, _SP_REGNO, _jitc->function->stack + OFF(1)); if (jit_regset_tstbit(&_jitc->function->regset, _L2)) - ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2)); + ldxi(_L2_REGNO, _SP_REGNO, _jitc->function->stack + OFF(2)); if (jit_regset_tstbit(&_jitc->function->regset, _L3)) - ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3)); + ldxi(_L3_REGNO, _SP_REGNO, _jitc->function->stack + OFF(3)); if (jit_regset_tstbit(&_jitc->function->regset, _L4)) - ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4)); + ldxi(_L4_REGNO, _SP_REGNO, _jitc->function->stack + OFF(4)); if (jit_regset_tstbit(&_jitc->function->regset, _L5)) - ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5)); + ldxi(_L5_REGNO, _SP_REGNO, _jitc->function->stack + OFF(5)); if (jit_regset_tstbit(&_jitc->function->regset, _L6)) - ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6)); + ldxi(_L6_REGNO, _SP_REGNO, _jitc->function->stack + OFF(6)); if (jit_regset_tstbit(&_jitc->function->regset, _L7)) - ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7)); + ldxi(_L7_REGNO, _SP_REGNO, _jitc->function->stack + OFF(7)); RESTOREI(0, 0, 0); RETL(); NOP(); @@ -2649,6 +2794,11 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) else abort(); } + else if (i.op.b == 1) { + assert(s30_p((label - instr) >> 2)); + i.disp30.b = (label - instr) >> 2; + u.i[0] = i.v; + } else abort(); } diff --git a/deps/lightning/lib/jit_sparc-fpu.c b/deps/lightning/lib/jit_sparc-fpu.c index 95313477..d0e7e813 100644 --- a/deps/lightning/lib/jit_sparc-fpu.c +++ b/deps/lightning/lib/jit_sparc-fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -498,6 +498,44 @@ _f3f(jit_state_t *_jit, jit_int32_t rd, } # if __WORDSIZE == 64 +/* Handle the special case of using all float registers, as exercised + * in check/carg.c. + * For example: + * putargr_f JIT_F0 $ARG + * where JIT_F0 is %f32 and $ARG is %f31 and if %f30 (the mapping for %f31) + * is live, the jit_get_reg() call might return %f30, but, because it is + * live, will spill/reload it, generating assembly: + * + * std %f30, [ %fp + OFFS ] + * fmovd %f32, %f30 + * fmovs %f30, %f31 + * ldd [ %fp + OFFS ], %f30 + * + * what basically becomes a noop as it restores the old value. + */ +#define get_sng_reg(u) _get_sng_reg(_jit, u) +static jit_int32_t +_get_sng_reg(jit_state_t *_jit, jit_int32_t r0) +{ + jit_int32_t reg, tmp; + /* Attempt to get a nospill register */ + reg = jit_get_reg(CLASS_SNG | jit_class_nospill | jit_class_chk); + if (reg == JIT_NOREG) { + /* Will need to spill, so allow spilling it. */ + reg = jit_get_reg(CLASS_SNG); + /* If the special condition happens, allocate another one. + * This will generate uglier machine code (code for floats + * is already ugly), but will work, but doing a double + * spill/reload; the first one being a noop. */ + if (rn(reg) == r0 - 1) { + tmp = reg; + reg = jit_get_reg(CLASS_SNG); + jit_unget_reg(tmp); + } + } + return (reg); +} + static void _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { @@ -507,7 +545,7 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) if (single_precision_p(r1)) FMOVS(r1, r0); else { - t1 = jit_get_reg(CLASS_SNG); + t1 = get_sng_reg(r0); movr_d(rn(t1), r1); FMOVS(rn(t1), r0); jit_unget_reg(t1); @@ -515,13 +553,13 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } else { if (single_precision_p(r1)) { - t0 = jit_get_reg(CLASS_SNG); + t0 = get_sng_reg(r0); FMOVS(r1, rn(t0)); movr_d(r0, rn(t0)); jit_unget_reg(t0); } else { - t1 = jit_get_reg(CLASS_SNG); + t1 = get_sng_reg(r0); movr_d(rn(t1), r1); FMOVS(rn(t1), rn(t1)); movr_d(r0, rn(t1)); @@ -1491,7 +1529,12 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) assert(_jitc->function->self.call & jit_call_varargs); /* Load argument. */ +#if __WORDSIZE == 64 ldr_d(r0, r1); +#else + ldr_f(r0, r1); + ldxi_f(r0 + 1, r1, 4); +#endif /* Update vararg stack pointer. */ addi(r1, r1, 8); diff --git a/deps/lightning/lib/jit_sparc-sz.c b/deps/lightning/lib/jit_sparc-sz.c index 265769dd..95954d93 100644 --- a/deps/lightning/lib/jit_sparc-sz.c +++ b/deps/lightning/lib/jit_sparc-sz.c @@ -1,10 +1,11 @@ #if __WORDSIZE == 32 -#define JIT_INSTR_MAX 52 +#define JIT_INSTR_MAX 180 0, /* data */ 0, /* live */ 0, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 0, /* label */ @@ -13,7 +14,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -21,11 +25,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ - 8, /* va_arg_d */ + 12, /* va_arg_d */ 0, /* va_end */ 4, /* addr */ 12, /* addi */ @@ -96,12 +112,17 @@ 8, /* movi */ 16, /* movnr */ 16, /* movzr */ + 24, /* casr */ + 32, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 0, /* bswapr_ul */ 8, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -190,17 +211,41 @@ 12, /* bxsubr_u */ 12, /* bxsubi_u */ 8, /* jmpr */ - 16, /* jmpi */ + 8, /* jmpi */ 8, /* callr */ - 16, /* calli */ + 8, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -400,20 +445,20 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 20, /* bswapr_us */ - 52, /* bswapr_ui */ - 0, /* bswapr_ul */ - 24, /* casr */ - 32, /* casi */ + 176, /* clo */ + 148, /* clz */ + 180, /* cto */ + 152, /* ctz */ #endif /* __WORDSIZE */ #if __WORDSIZE == 64 -#define JIT_INSTR_MAX 116 +#define JIT_INSTR_MAX 216 0, /* data */ 0, /* live */ - 4, /* align */ + 24, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 4, /* label */ @@ -422,7 +467,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -430,8 +478,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 4, /* va_start */ 8, /* va_arg */ 8, /* va_arg_d */ @@ -505,29 +565,34 @@ 24, /* movi */ 16, /* movnr */ 16, /* movzr */ + 24, /* casr */ + 44, /* casi */ 8, /* extr_c */ 4, /* extr_uc */ 8, /* extr_s */ 8, /* extr_us */ 8, /* extr_i */ 8, /* extr_ui */ + 20, /* bswapr_us */ + 52, /* bswapr_ui */ + 116, /* bswapr_ul */ 8, /* htonr_us */ 8, /* htonr_ui */ 4, /* htonr_ul */ 4, /* ldr_c */ 24, /* ldi_c */ 4, /* ldr_uc */ - 24, /* ldi_uc */ + 28, /* ldi_uc */ 4, /* ldr_s */ - 24, /* ldi_s */ + 28, /* ldi_s */ 4, /* ldr_us */ - 24, /* ldi_us */ + 28, /* ldi_us */ 4, /* ldr_i */ - 24, /* ldi_i */ + 28, /* ldi_i */ 4, /* ldr_ui */ - 24, /* ldi_ui */ + 28, /* ldi_ui */ 4, /* ldr_l */ - 24, /* ldi_l */ + 28, /* ldi_l */ 4, /* ldxr_c */ 24, /* ldxi_c */ 4, /* ldxr_uc */ @@ -543,13 +608,13 @@ 4, /* ldxr_l */ 24, /* ldxi_l */ 4, /* str_c */ - 24, /* sti_c */ + 28, /* sti_c */ 4, /* str_s */ - 24, /* sti_s */ + 28, /* sti_s */ 4, /* str_i */ - 24, /* sti_i */ + 28, /* sti_i */ 4, /* str_l */ - 24, /* sti_l */ + 28, /* sti_l */ 4, /* stxr_c */ 24, /* stxi_c */ 4, /* stxr_s */ @@ -599,17 +664,41 @@ 12, /* bxsubr_u */ 12, /* bxsubi_u */ 8, /* jmpr */ - 32, /* jmpi */ + 8, /* jmpi */ 8, /* callr */ - 32, /* calli */ + 40, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -666,14 +755,14 @@ 16, /* truncr_f_l */ 20, /* extr_f */ 12, /* extr_d_f */ - 16, /* movr_f */ + 24, /* movr_f */ 32, /* movi_f */ 8, /* ldr_f */ - 28, /* ldi_f */ + 32, /* ldi_f */ 8, /* ldxr_f */ 28, /* ldxi_f */ 8, /* str_f */ - 28, /* sti_f */ + 32, /* sti_f */ 8, /* stxr_f */ 28, /* stxi_f */ 20, /* bltr_f */ @@ -681,13 +770,13 @@ 20, /* bler_f */ 44, /* blei_f */ 28, /* beqr_f */ - 60, /* beqi_f */ + 52, /* beqi_f */ 20, /* bger_f */ 44, /* bgei_f */ 20, /* bgtr_f */ 44, /* bgti_f */ 20, /* bner_f */ - 44, /* bnei_f */ + 60, /* bnei_f */ 20, /* bunltr_f */ 44, /* bunlti_f */ 20, /* bunler_f */ @@ -760,11 +849,11 @@ 4, /* movr_d */ 32, /* movi_d */ 4, /* ldr_d */ - 24, /* ldi_d */ + 28, /* ldi_d */ 4, /* ldxr_d */ 24, /* ldxi_d */ 4, /* str_d */ - 24, /* sti_d */ + 28, /* sti_d */ 4, /* stxr_d */ 24, /* stxi_d */ 12, /* bltr_d */ @@ -809,9 +898,8 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 20, /* bswapr_us */ - 52, /* bswapr_ui */ - 116, /* bswapr_ul */ - 24, /* casr */ - 44, /* casi */ + 216, /* clo */ + 188, /* clz */ + 204, /* cto */ + 176, /* ctz */ #endif /* __WORDSIZE */ diff --git a/deps/lightning/lib/jit_sparc.c b/deps/lightning/lib/jit_sparc.c index cd45d236..9e837d8a 100644 --- a/deps/lightning/lib/jit_sparc.c +++ b/deps/lightning/lib/jit_sparc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Free Software Foundation, Inc. + * Copyright (C) 2013-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -17,6 +17,16 @@ * Paulo Cesar Pereira de Andrade */ +/* Handling SIGILL should not be done by Lightning, but can either use + * sample, or use another approach to set jit_cpu.lzcnt + */ +#define CHECK_LZCNT 0 + +#if CHECK_LZCNT +#include +#include +#endif + #define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) #if __WORDSIZE == 32 # define jit_arg_d_reg_p(i) ((i) >= 0 && (i) < 5) @@ -40,11 +50,13 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define PROTO 1 # include "jit_sparc-cpu.c" # include "jit_sparc-fpu.c" +# include "jit_fallback.c" #undef PROTO /* * Initialization */ +jit_cpu_t jit_cpu; jit_register_t _rvs[] = { { 0x00, "%g0" }, { 0x01, "%g1" }, @@ -147,13 +159,45 @@ jit_register_t _rvs[] = { # endif { _NOREG, "" }, }; +#if CHECK_LZCNT +sigjmp_buf jit_env; +#endif /* * Implementation */ +#if CHECK_LZCNT +static void +sigill_handler(int signum) +{ + jit_cpu.lzcnt = 0; + siglongjmp(jit_env, 1); +} +#endif + void jit_get_cpu(void) { +#if CHECK_LZCNT + int g2; + struct sigaction new_action, old_action; + new_action.sa_handler = sigill_handler; + sigemptyset(&new_action.sa_mask); + new_action.sa_flags = 0; + sigaction(SIGILL, NULL, &old_action); + if (old_action.sa_handler != SIG_IGN) { + sigaction(SIGILL, &new_action, NULL); + if (!sigsetjmp(jit_env, 1)) { + jit_cpu.lzcnt = 1; + /* lzcnt %g2, %g2 */ + __asm__ volatile("mov %%g2, %0; .long 0xa3b0021; mov %0, %%g2" + : "=r" (g2)); + sigaction(SIGILL, &old_action, NULL); + } + } +#else + jit_cpu.lzcnt = 0; +#endif } void @@ -184,7 +228,7 @@ _jit_prolog(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; _jitc->function->self.size = stack_framesize; _jitc->function->self.argi = _jitc->function->self.argf = - _jitc->function->self.aoff = _jitc->function->self.alen = 0; + _jitc->function->self.alen = 0; /* float conversion */ # if __WORDSIZE == 32 _jitc->function->self.aoff = -8; @@ -265,20 +309,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(retr, u); - if (JIT_RET != u) - jit_movr(JIT_RET, u); - jit_live(JIT_RET); + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -339,12 +381,13 @@ jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { # if __WORDSIZE == 32 - if (u->code == jit_code_arg || u->code == jit_code_arg_f) + if ((u->code >= jit_code_arg_c && u->code <= jit_code_arg) || + u->code == jit_code_arg_f) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_d); return (jit_arg_d_reg_p(u->u.w)); # else - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f); return (jit_arg_d_reg_p(u->u.w)); @@ -379,11 +422,15 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); + assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif if (jit_arg_reg_p(_jitc->function->self.argi)) offset = _jitc->function->self.argi++; else { @@ -394,7 +441,7 @@ _jit_arg(jit_state_t *_jit) offset = BIAS(_jitc->function->self.size); _jitc->function->self.size += sizeof(jit_word_t); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -471,7 +518,7 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, _I0 + v->u.w); @@ -484,7 +531,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, _I0 + v->u.w); @@ -497,7 +544,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, _I0 + v->u.w); @@ -510,7 +557,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, _I0 + v->u.w); @@ -523,7 +570,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) { # if __WORDSIZE == 64 @@ -542,7 +589,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, _I0 + v->u.w); @@ -555,7 +602,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_i, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, _I0 + v->u.w); @@ -566,10 +613,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) # endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(_I0 + v->u.w, u); else @@ -578,11 +625,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); if (jit_arg_reg_p(v->u.w)) jit_movi(_I0 + v->u.w, u); else { @@ -795,9 +842,9 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movr(_O0 + _jitc->function->call.argi, u); @@ -816,10 +863,10 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); if (jit_arg_reg_p(_jitc->function->call.argi)) { jit_movi(_O0 + _jitc->function->call.argi, u); @@ -1193,6 +1240,7 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1326,6 +1374,9 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; + case jit_code_skip: + nop((node->u.w + 3) & ~3); + break; case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; @@ -1506,6 +1557,10 @@ _emit_code(jit_state_t *_jit) break; case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_brr(blt,); case_brw(blt,); case_brr(blt, _u); @@ -1723,7 +1778,12 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s22_p(word >> 2)) + word = jmpi(_jit->pc.w); + else + word = jmpi_p(_jit->pc.w); patch(word, node); } } @@ -1738,9 +1798,17 @@ _emit_code(jit_state_t *_jit) temp = node->u.n; assert(temp->code == jit_code_label || temp->code == jit_code_epilog); - word = calli_p(temp->u.w); - if (!(temp->flag & jit_flag_patch)) + if (temp->flag & jit_flag_patch) + calli(temp->u.w); + else { + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if (s30_p(word >> 2)) + word = calli(_jit->pc.w); + else + word = calli_p(_jit->pc.w); patch(word, node); + } } else calli(node->u.w); @@ -1749,6 +1817,7 @@ _emit_code(jit_state_t *_jit) _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif @@ -1769,6 +1838,16 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif @@ -1793,11 +1872,23 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +#if __WORDSIZE == 64 + case jit_code_arg_l: +#endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -1807,10 +1898,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -1882,6 +1989,7 @@ _emit_code(jit_state_t *_jit) #define CODE 1 # include "jit_sparc-cpu.c" # include "jit_sparc-fpu.c" +# include "jit_fallback.c" #undef CODE void diff --git a/deps/lightning/lib/jit_x86-cpu.c b/deps/lightning/lib/jit_x86-cpu.c index 1a473dee..f0e41554 100644 --- a/deps/lightning/lib/jit_x86-cpu.c +++ b/deps/lightning/lib/jit_x86-cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -136,12 +136,6 @@ # else # define il(l) ii(l) # endif -# define patch_abs(instr, label) \ - *(jit_word_t *)(instr - sizeof(jit_word_t)) = label -# define patch_rel(instr, label) \ - *(jit_int32_t *)(instr - 4) = label - instr -# define patch_rel_char(instr, label) \ - *(jit_int8_t *)(instr - 1) = label - instr # define rex(l, w, r, x, b) _rex(_jit, l, w, r, x, b) static void _rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t); @@ -186,7 +180,8 @@ static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); #define addci(r0, r1, i0) _addci(_jit, r0, r1, i0) static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t); -# define iaddxr(r0, r1) alur(X86_ADC, r0, r1) +# define iaddxr(r0, r1) _iaddxr(_jit, r0, r1) +static void _iaddxr(jit_state_t*, jit_int32_t, jit_int32_t); # define addxr(r0, r1, r2) _addxr(_jit, r0, r1, r2) static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t); # define iaddxi(r0, i0) alui(X86_ADC, r0, i0) @@ -308,6 +303,14 @@ static void _incr(jit_state_t*, jit_int32_t, jit_int32_t); # define decr(r0, r1) _decr(_jit, r0, r1) static void _decr(jit_state_t*, jit_int32_t, jit_int32_t); # endif +# define clor(r0, r1) _clor(_jit, r0, r1) +static void _clor(jit_state_t*, jit_int32_t, jit_int32_t); +# define clzr(r0, r1) _clzr(_jit, r0, r1) +static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctor(r0, r1) _ctor(_jit, r0, r1) +static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t); +# define ctzr(r0, r1) _ctzr(_jit, r0, r1) +static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t); # define cr(code, r0, r1, r2) _cr(_jit, code, r0, r1, r2) static void _cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t); @@ -358,7 +361,13 @@ static void _movr(jit_state_t*, jit_int32_t, jit_int32_t); # define imovi(r0, i0) _imovi(_jit, r0, i0) static void _imovi(jit_state_t*, jit_int32_t, jit_word_t); # define movi(r0, i0) _movi(_jit, r0, i0) -static void _movi(jit_state_t*, jit_int32_t, jit_word_t); +static +# if CAN_RIP_ADDRESS +jit_word_t +# else +void +# endif +_movi(jit_state_t*, jit_int32_t, jit_word_t); # define movi_p(r0, i0) _movi_p(_jit, r0, i0) static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t); # define movcr(r0, r1) _movcr(_jit, r0, r1) @@ -547,7 +556,7 @@ static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); # define jng(i0) jcc(X86_CC_NG, i0) # define jg(i0) jcc(X86_CC_G, i0) # define jnle(i0) jcc(X86_CC_NLE, i0) -static void _jcc(jit_state_t*, jit_int32_t, jit_word_t); +static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t); # define jccs(code, i0) _jccs(_jit, code, i0) # define jos(i0) jccs(X86_CC_O, i0) # define jnos(i0) jccs(X86_CC_NO, i0) @@ -579,13 +588,15 @@ static void _jcc(jit_state_t*, jit_int32_t, jit_word_t); # define jngs(i0) jccs(X86_CC_NG, i0) # define jgs(i0) jccs(X86_CC_G, i0) # define jnles(i0) jccs(X86_CC_NLE, i0) -static void _jccs(jit_state_t*, jit_int32_t, jit_word_t); +static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t); # define jcr(code, i0, r0, r1) _jcr(_jit, code, i0, r0, r1) -static void _jcr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); +static jit_word_t _jcr(jit_state_t*, + jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t); # define jci(code, i0, r0, i1) _jci(_jit, code, i0, r0, i1) -static void _jci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); +static jit_word_t _jci(jit_state_t*, + jit_int32_t,jit_word_t,jit_int32_t,jit_word_t); # define jci0(code, i0, r0) _jci0(_jit, code, i0, r0) -static void _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t); +static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t); # define bltr(i0, r0, r1) _bltr(_jit, i0, r0, r1) static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t); # define blti(i0, r0, i1) _blti(_jit, i0, r0, i1) @@ -687,7 +698,7 @@ static jit_word_t _jmpi_p(jit_state_t*, jit_word_t); # define jmpi_p(i0) jmpi(i0) # endif # define jmpsi(i0) _jmpsi(_jit, i0) -static void _jmpsi(jit_state_t*, jit_uint8_t); +static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t); # define prolog(node) _prolog(_jit, node) static void _prolog(jit_state_t*, jit_node_t*); # define epilog(node) _epilog(_jit, node) @@ -698,8 +709,8 @@ static void _vastart(jit_state_t*, jit_int32_t); static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t); # define vaarg_d(r0, r1, i0) _vaarg_d(_jit, r0, r1, i0) static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t); -# define patch_at(node, instr, label) _patch_at(_jit, node, instr, label) -static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t); +# define patch_at(instr, label) _patch_at(_jit, instr, label) +static void _patch_at(jit_state_t*, jit_word_t, jit_word_t); # if !defined(HAVE_FFSL) # if __X32 # define ffsl(i) __builtin_ffs(i) @@ -735,11 +746,16 @@ _rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md, { if (ri == _NOREG) { if (rb == _NOREG) { -#if __X32 - mrm(0x00, r7(rd), 0x05); -#else - mrm(0x00, r7(rd), 0x04); - sib(_SCL1, 0x04, 0x05); + /* Use ms == _SCL8 to tell it is a %rip relative displacement */ +#if __X64 + if (ms == _SCL8) +#endif + mrm(0x00, r7(rd), 0x05); +#if __X64 + else { + mrm(0x00, r7(rd), 0x04); + sib(_SCL1, 0x04, 0x05); + } #endif ii(md); } @@ -1036,6 +1052,49 @@ _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) } } +static void +_iaddxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + /* FIXME: this is not doing what I did expect for the simple test case: + * mov $0xffffffffffffffff, %rax -- rax = 0xffffffffffffffff (-1) + * mov $0xffffffffffffffff, %r10 -- r10 = 0xffffffffffffffff (-1) + * mov $0x1, %r11d -- r11 = 1 + * xor %rbx, %rbx -- rbx = 0 + * (gdb) p $eflags + * $1 = [ PF ZF IF ] + * add %r11, %rax -- r11 = 0x10000000000000000 (0) + * does not fit in 64 bit ^ + * (gdb) p $eflags + * $2 = [ CF PF AF ZF IF ] + * adcx %r10, %rbx -- r10 = 0xffffffffffffffff (-1) + * (gdb) p $eflags + * $3 = [ CF PF AF ZF IF ] + * (gdb) p/x $r10 + * $4 = 0xffffffffffffffff + * but, r10 should be zero, as it is: + * -1 (%r10) + 0 (%rbx) + carry (!!eflags.CF) + * FIXME: maybe should only use ADCX in the third operation onward, that + * is, after the first ADC? In either case, the add -1+0+carry should + * have used and consumed the carry? At least this is what is expected + * in Lightning... + */ +#if 0 + /* Significantly longer instruction, but avoid cpu stalls as only + * the carry flag is used in a sequence. */ + if (jit_cpu.adx) { + /* ADCX */ + ic(0x66); + rex(0, WIDE, r1, _NOREG, r0); + ic(0x0f); + ic(0x38); + ic(0xf6); + mrm(0x03, r7(r1), r7(r0)); + } + else +#endif + alur(X86_ADC, r0, r1); +} + static void _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) { @@ -1051,7 +1110,12 @@ static void _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { + if ( +#if 0 + /* Do not mix ADC and ADCX */ + !jit_cpu.adx && +#endif + can_sign_extend_int_p(i0)) { movr(r0, r1); iaddxi(r0, i0); } @@ -1913,6 +1977,88 @@ _decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) } #endif +static void +_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + comr(r0, r1); + clzr(r0, r0); +} + +static void +_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w, x; + /* LZCNT */ + if (jit_cpu.abm) + ic(0xf3); + /* else BSR */ + rex(0, WIDE, r0, _NOREG, r1); + ic(0x0f); + ic(0xbd); + mrm(0x3, r7(r0), r7(r1)); + if (!jit_cpu.abm) { + /* jump if undefined: r1 == 0 */ + w = jccs(X86_CC_E, _jit->pc.w); + /* count leading zeros */ + rsbi(r0, r0, __WORDSIZE - 1); + /* done */ + x = jmpsi(_jit->pc.w); + /* if r1 == 0 */ + patch_at(w, _jit->pc.w); + movi(r0, __WORDSIZE); + /* not undefined */ + patch_at(x, _jit->pc.w); + } + /* LZCNT has defined behavior for value zero and count leading zeros */ +} + +static void +_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + comr(r0, r1); + ctzr(r0, r0); +} + +static void +_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) +{ + jit_word_t w; + jit_int32_t t0; + if (!jit_cpu.abm) { + if (jit_cmov_p()) + t0 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk); + else + t0 = _NOREG; + if (t0 != _NOREG) + movi(rn(t0), __WORDSIZE); + } + /* TZCNT */ + if (jit_cpu.abm) + ic(0xf3); + /* else BSF */ + rex(0, WIDE, r0, _NOREG, r1); + ic(0x0f); + ic(0xbc); + mrm(0x3, r7(r0), r7(r1)); + if (!jit_cpu.abm) { + /* No conditional move or need spill/reload a temporary */ + if (t0 == _NOREG) { + w = jccs(X86_CC_E, _jit->pc.w); + movi(r0, __WORDSIZE); + patch_at(w, _jit->pc.w); + } + else { + /* CMOVE */ + rex(0, WIDE, r0, _NOREG, rn(t0)); + ic(0x0f); + ic(0x44); + mrm(0x3, r7(r0), r7(rn(t0))); + jit_unget_reg(t0); + } + } + /* TZCNT has defined behavior for value zero */ +} + static void _cr(jit_state_t *_jit, jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) @@ -2162,6 +2308,12 @@ _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) ii(i0); # if !__X64_32 } + else if (can_sign_extend_int_p(i0)) { + rex(0, 1, _NOREG, _NOREG, r0); + ic(0xc7); + ic(0xc0 | r7(r0)); + ii(i0); + } else { rex(0, 1, _NOREG, _NOREG, r0); ic(0xb8 | r7(r0)); @@ -2174,22 +2326,45 @@ _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) #endif } +#if CAN_RIP_ADDRESS +static jit_word_t +#else static void +#endif _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { +#if CAN_RIP_ADDRESS + jit_word_t w, rel; + w = _jit->pc.w; + rel = i0 - (w + 8); + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + /* lea rel(%rip), %r0 */ + rex(0, WIDE, r0, _NOREG, _NOREG); + w = _jit->pc.w; + ic(0x8d); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif if (i0) imovi(r0, i0); else ixorr(r0, r0); +#if CAN_RIP_ADDRESS + return (w); +#endif } static jit_word_t _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { + jit_word_t w; rex(0, WIDE, _NOREG, _NOREG, r0); + w = _jit->pc.w; ic(0xb8 | r7(r0)); il(i0); - return (_jit->pc.w); + return (w); } static void @@ -2404,7 +2579,18 @@ static void _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + rex(0, WIDE, r0, _NOREG, _NOREG); + ic(0x0f); + ic(0xbe); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif + if (address_p(i0)) { rex(0, WIDE, r0, _NOREG, _NOREG); ic(0x0f); ic(0xbe); @@ -2431,7 +2617,18 @@ static void _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + rex(0, WIDE, r0, _NOREG, _NOREG); + ic(0x0f); + ic(0xb6); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif + if (address_p(i0)) { rex(0, WIDE, r0, _NOREG, _NOREG); ic(0x0f); ic(0xb6); @@ -2458,7 +2655,18 @@ static void _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + rex(0, WIDE, r0, _NOREG, _NOREG); + ic(0x0f); + ic(0xbf); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif + if (address_p(i0)) { rex(0, WIDE, r0, _NOREG, _NOREG); ic(0x0f); ic(0xbf); @@ -2485,7 +2693,18 @@ static void _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + rex(0, WIDE, r0, _NOREG, _NOREG); + ic(0x0f); + ic(0xb7); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif + if (address_p(i0)) { rex(0, WIDE, r0, _NOREG, _NOREG); ic(0x0f); ic(0xb7); @@ -2516,7 +2735,17 @@ static void _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + rex(0, WIDE, r0, _NOREG, _NOREG); + ic(0x63); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif + if (address_p(i0)) { #if __X64 rex(0, WIDE, r0, _NOREG, _NOREG); ic(0x63); @@ -2547,7 +2776,17 @@ static void _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { +# if !__X64_32 + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + rex(0, 0, r0, _NOREG, _NOREG); + ic(0x63); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif + if (address_p(i0)) { rex(0, 0, r0, _NOREG, _NOREG); ic(0x63); rx(r0, i0, _NOREG, _NOREG, _SCL1); @@ -2555,7 +2794,11 @@ _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); +# if __X64_32 + ldr_i(r0, rn(reg)); +# else ldr_ui(r0, rn(reg)); +# endif jit_unget_reg(reg); } } @@ -2573,8 +2816,15 @@ static void _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { - rex(0, 1, r0, _NOREG, _NOREG); + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + rex(0, WIDE, r0, _NOREG, _NOREG); + ic(0x8b); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else if (can_sign_extend_int_p(i0)) { + rex(0, WIDE, r0, _NOREG, _NOREG); ic(0x8b); rx(r0, i0, _NOREG, _NOREG, _SCL1); } @@ -2778,7 +3028,11 @@ _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0) else { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), i0); +# if __X64_32 + ldxr_i(r0, r1, rn(reg)); +# else ldxr_ui(r0, r1, rn(reg)); +# endif jit_unget_reg(reg); } } @@ -2834,7 +3088,27 @@ static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 16 : rel + 16; + if (can_sign_extend_int_p(rel)) { + if (reg8_p(r0)) { + rex(0, 0, r0, _NOREG, _NOREG); + ic(0x88); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else { + reg = jit_get_reg(jit_class_gpr|jit_class_rg8); + movr(rn(reg), r0); + rex(0, 0, rn(reg), _NOREG, _NOREG); + ic(0x88); + rx(rn(reg), i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + jit_unget_reg(reg); + } + } + else +#endif + if (address_p(i0)) { if (reg8_p(r0)) { rex(0, 0, r0, _NOREG, _NOREG); ic(0x88); @@ -2870,7 +3144,18 @@ static void _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + ic(0x66); + rex(0, 0, r0, _NOREG, _NOREG); + ic(0x89); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif + if (address_p(i0)) { ic(0x66); rex(0, 0, r0, _NOREG, _NOREG); ic(0x89); @@ -2896,7 +3181,17 @@ static void _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; - if (can_sign_extend_int_p(i0)) { +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + rex(0, 0, r0, _NOREG, _NOREG); + ic(0x89); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif + if (address_p(i0)) { rex(0, 0, r0, _NOREG, _NOREG); ic(0x89); rx(r0, i0, _NOREG, _NOREG, _SCL1); @@ -2922,8 +3217,18 @@ static void _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - _jit->pc.w; + rel = rel < 0 ? rel - 8 : rel + 8; + if (can_sign_extend_int_p(rel)) { + rex(0, WIDE, r0, _NOREG, _NOREG); + ic(0x89); + rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8); + } + else +#endif if (can_sign_extend_int_p(i0)) { - rex(0, 1, r0, _NOREG, _NOREG); + rex(0, WIDE, r0, _NOREG, _NOREG); ic(0x89); rx(r0, i0, _NOREG, _NOREG, _SCL1); } @@ -3084,208 +3389,221 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) } #endif -static void +static jit_word_t _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0) { + jit_word_t d; jit_word_t w; + w = _jit->pc.w; + d = i0 - (w + 1); ic(0x70 | code); - w = i0 - (_jit->pc.w + 1); - ic(w); + ic(d); + return (w); } -static void +static jit_word_t _jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0) { + jit_word_t d; jit_word_t w; + w = _jit->pc.w; ic(0x0f); + d = i0 - (w + 6); ic(0x80 | code); - w = i0 - (_jit->pc.w + 4); - ii(w); + ii(d); + return (w); } -static void +static jit_word_t _jcr(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { alur(X86_CMP, r0, r1); - jcc(code, i0); + return (jcc(code, i0)); } -static void +static jit_word_t _jci(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { alui(X86_CMP, r0, i1); - jcc(code, i0); + return (jcc(code, i0)); } -static void +static jit_word_t _jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0) { testr(r0, r0); - jcc(code, i0); + return (jcc(code, i0)); } static jit_word_t _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_L, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_L, i0, r0, r1)); } static jit_word_t _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_L, i0, r0, i1); - else jci0(X86_CC_S, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_L, i0, r0, i1); + else w = jci0(X86_CC_S, i0, r0); + return (w); } static jit_word_t _bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_B, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_B, i0, r0, r1)); } static jit_word_t _blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_B, i0, r0, i1); - else jci0(X86_CC_B, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_B, i0, r0, i1); + else w = jci0(X86_CC_B, i0, r0); + return (w); } static jit_word_t _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_LE, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_LE, i0, r0, r1); + return (w); } static jit_word_t _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_LE, i0, r0, i1); - else jci0(X86_CC_LE, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_LE, i0, r0, i1); + else w = jci0(X86_CC_LE, i0, r0); + return (w); } static jit_word_t _bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_BE, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_BE, i0, r0, r1); + return (w); } static jit_word_t _blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_BE, i0, r0, i1); - else jci0(X86_CC_BE, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_BE, i0, r0, i1); + else w = jci0(X86_CC_BE, i0, r0); + return (w); } static jit_word_t _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_E, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_E, i0, r0, r1); + return (w); } static jit_word_t _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_E, i0, r0, i1); - else jci0(X86_CC_E, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_E, i0, r0, i1); + else w = jci0(X86_CC_E, i0, r0); + return (w); } static jit_word_t _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_GE, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_GE, i0, r0, r1); + return (w); } static jit_word_t _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_GE, i0, r0, i1); - else jci0(X86_CC_NS, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_GE, i0, r0, i1); + else w = jci0(X86_CC_NS, i0, r0); + return (w); } static jit_word_t _bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - if (r0 == r1) jmpi(i0); - else jcr (X86_CC_AE, i0, r0, r1); - return (_jit->pc.w); + jit_word_t w; + if (r0 == r1) w = jmpi(i0); + else w = jcr (X86_CC_AE, i0, r0, r1); + return (w); } static jit_word_t _bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_AE, i0, r0, i1); - else jmpi(i0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_AE, i0, r0, i1); + else w = jmpi(i0); + return (w); } static jit_word_t _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_G, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_G, i0, r0, r1)); } static jit_word_t _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - jci(X86_CC_G, i0, r0, i1); - return (_jit->pc.w); + return (jci(X86_CC_G, i0, r0, i1)); } static jit_word_t _bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_A, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_A, i0, r0, r1)); } static jit_word_t _bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_A, i0, r0, i1); - else jci0(X86_CC_NE, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_A, i0, r0, i1); + else w = jci0(X86_CC_NE, i0, r0); + return (w); } static jit_word_t _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { - jcr(X86_CC_NE, i0, r0, r1); - return (_jit->pc.w); + return (jcr(X86_CC_NE, i0, r0, r1)); } static jit_word_t _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) { - if (i1) jci (X86_CC_NE, i0, r0, i1); - else jci0(X86_CC_NE, i0, r0); - return (_jit->pc.w); + jit_word_t w; + if (i1) w = jci (X86_CC_NE, i0, r0, i1); + else w = jci0(X86_CC_NE, i0, r0); + return (w); } static jit_word_t _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { testr(r0, r1); - jnz(i0); - return (_jit->pc.w); + return (jnz(i0)); } static jit_word_t @@ -3300,16 +3618,14 @@ _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) testr(r0, rn(reg)); jit_unget_reg(reg); } - jnz(i0); - return (_jit->pc.w); + return (jnz(i0)); } static jit_word_t _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { testr(r0, r1); - jz(i0); - return (_jit->pc.w); + return (jz(i0)); } static jit_word_t @@ -3324,16 +3640,14 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) testr(r0, rn(reg)); jit_unget_reg(reg); } - jz(i0); - return (_jit->pc.w); + return (jz(i0)); } static jit_word_t _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { iaddr(r0, r1); - jo(i0); - return (_jit->pc.w); + return (jo(i0)); } static jit_word_t @@ -3342,8 +3656,7 @@ _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { iaddi(r0, i1); - jo(i0); - return (_jit->pc.w); + return (jo(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3355,8 +3668,7 @@ static jit_word_t _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { iaddr(r0, r1); - jc(i0); - return (_jit->pc.w); + return (jc(i0)); } static jit_word_t @@ -3365,8 +3677,7 @@ _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { iaddi(r0, i1); - jc(i0); - return (_jit->pc.w); + return (jc(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3378,8 +3689,7 @@ static jit_word_t _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { iaddr(r0, r1); - jno(i0); - return (_jit->pc.w); + return (jno(i0)); } static jit_word_t @@ -3388,8 +3698,7 @@ _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { iaddi(r0, i1); - jno(i0); - return (_jit->pc.w); + return (jno(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3401,8 +3710,7 @@ static jit_word_t _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { iaddr(r0, r1); - jnc(i0); - return (_jit->pc.w); + return (jnc(i0)); } static jit_word_t @@ -3411,8 +3719,7 @@ _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { iaddi(r0, i1); - jnc(i0); - return (_jit->pc.w); + return (jnc(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3424,8 +3731,7 @@ static jit_word_t _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { isubr(r0, r1); - jo(i0); - return (_jit->pc.w); + return (jo(i0)); } static jit_word_t @@ -3434,8 +3740,7 @@ _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { isubi(r0, i1); - jo(i0); - return (_jit->pc.w); + return (jo(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3447,8 +3752,7 @@ static jit_word_t _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { isubr(r0, r1); - jc(i0); - return (_jit->pc.w); + return (jc(i0)); } static jit_word_t @@ -3457,8 +3761,7 @@ _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { isubi(r0, i1); - jc(i0); - return (_jit->pc.w); + return (jc(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3470,8 +3773,7 @@ static jit_word_t _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { isubr(r0, r1); - jno(i0); - return (_jit->pc.w); + return (jno(i0)); } static jit_word_t @@ -3480,8 +3782,7 @@ _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { isubi(r0, i1); - jno(i0); - return (_jit->pc.w); + return (jno(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3493,8 +3794,7 @@ static jit_word_t _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { isubr(r0, r1); - jnc(i0); - return (_jit->pc.w); + return (jnc(i0)); } static jit_word_t @@ -3503,8 +3803,7 @@ _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1) jit_int32_t reg; if (can_sign_extend_int_p(i1)) { isubi(r0, i1); - jnc(i0); - return (_jit->pc.w); + return (jnc(i0)); } reg = jit_get_reg(jit_class_gpr|jit_class_nospill); movi(rn(reg), i1); @@ -3523,35 +3822,39 @@ _callr(jit_state_t *_jit, jit_int32_t r0) static jit_word_t _calli(jit_state_t *_jit, jit_word_t i0) { - jit_word_t word; jit_word_t w; + jit_word_t d; + jit_word_t l = _jit->pc.w + 5; + d = i0 - l; #if __X64 - w = i0 - (_jit->pc.w + 5); - if ((jit_int32_t)w == w) { + if ( +# if __X64_32 + !((d < 0) ^ (l < 0)) && +# endif + (jit_int32_t)d == d) { #endif + w = _jit->pc.w; ic(0xe8); - w = i0 - (_jit->pc.w + 4); - ii(w); - word = _jit->pc.w; + ii(d); #if __X64 } else - word = calli_p(i0); + w = calli_p(i0); #endif - return (word); + return (w); } #if __X64 static jit_word_t _calli_p(jit_state_t *_jit, jit_word_t i0) { - jit_word_t word; + jit_word_t w; jit_int32_t reg; reg = jit_get_reg(jit_class_gpr); - word = movi_p(rn(reg), i0); + w = movi_p(rn(reg), i0); callr(rn(reg)); jit_unget_reg(reg); - return (word); + return (w); } #endif @@ -3566,51 +3869,58 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0) static jit_word_t _jmpi(jit_state_t *_jit, jit_word_t i0) { - jit_word_t word; jit_word_t w; + jit_word_t d; + jit_word_t l = _jit->pc.w + 5; + d = i0 - l; #if __X64 - w = i0 - (_jit->pc.w + 5); - if ((jit_int32_t)w == w) { + if ( +# if __X64_32 + !((d < 0) ^ (l < 0)) && +# endif + (jit_int32_t)d == d) { #endif + w = _jit->pc.w; ic(0xe9); - w = i0 - (_jit->pc.w + 4); - ii(w); - word = _jit->pc.w; + ii(d); #if __X64 } else - word = jmpi_p(i0); + w = jmpi_p(i0); #endif - return (word); + return (w); } #if __X64 static jit_word_t _jmpi_p(jit_state_t *_jit, jit_word_t i0) { - jit_word_t word; + jit_word_t w; jit_int32_t reg; reg = jit_get_reg(jit_class_gpr|jit_class_nospill); - word = movi_p(rn(reg), i0); + w = movi_p(rn(reg), i0); jmpr(rn(reg)); jit_unget_reg(reg); - return (word); + return (w); } #endif -static void +static jit_word_t _jmpsi(jit_state_t *_jit, jit_uint8_t i0) { + jit_word_t w = _jit->pc.w; ic(0xeb); ic(i0); + return (w); } static void _prolog(jit_state_t *_jit, jit_node_t *node) { - jit_int32_t reg; + jit_int32_t reg, offs; if (_jitc->function->define_frame || _jitc->function->assume_frame) { jit_int32_t frame = -_jitc->function->frame; + jit_check_frame(); assert(_jitc->function->self.aoff >= frame); if (_jitc->function->assume_frame) return; @@ -3623,76 +3933,51 @@ _prolog(jit_state_t *_jit, jit_node_t *node) (_jitc->function->self.alen > 32 ? _jitc->function->self.alen : 32) - /* align stack at 16 bytes */ - _jitc->function->self.aoff) + 15) & -16) + - stack_adjust; + _jitc->function->self.aoff) + 15) & -16); #else _jitc->function->stack = (((_jitc->function->self.alen - - _jitc->function->self.aoff) + 15) & -16) + - stack_adjust; + _jitc->function->self.aoff) + 15) & -16); #endif - subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE); + + if (_jitc->function->stack) + _jitc->function->need_stack = 1; + + if (!_jitc->function->need_frame && !_jitc->function->need_stack) { + /* check if any callee save register needs to be saved */ + for (reg = 0; reg < _jitc->reglen; ++reg) + if (jit_regset_tstbit(&_jitc->function->regset, reg) && + (_rvs[reg].spec & jit_class_sav)) { + _jitc->function->need_stack = 1; + break; + } + } + + if (_jitc->function->need_frame || _jitc->function->need_stack) + subi(_RSP_REGNO, _RSP_REGNO, jit_framesize()); /* callee save registers */ -#if __X32 - if (jit_regset_tstbit(&_jitc->function->regset, _RDI)) - stxi(12, _RSP_REGNO, _RDI_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _RSI)) - stxi( 8, _RSP_REGNO, _RSI_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) - stxi( 4, _RSP_REGNO, _RBX_REGNO); -#else -# if __CYGWIN__ || _WIN32 - if (jit_regset_tstbit(&_jitc->function->regset, _XMM15)) - sse_stxi_d(136, _RSP_REGNO, _XMM15_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM14)) - sse_stxi_d(128, _RSP_REGNO, _XMM14_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM13)) - sse_stxi_d(120, _RSP_REGNO, _XMM13_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM12)) - sse_stxi_d(112, _RSP_REGNO, _XMM12_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM11)) - sse_stxi_d(104, _RSP_REGNO, _XMM11_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM10)) - sse_stxi_d(96, _RSP_REGNO, _XMM10_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM9)) - sse_stxi_d(88, _RSP_REGNO, _XMM9_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM8)) - sse_stxi_d(80, _RSP_REGNO, _XMM8_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM7)) - sse_stxi_d(72, _RSP_REGNO, _XMM7_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM6)) - sse_stxi_d(64, _RSP_REGNO, _XMM6_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _R15)) - stxi(56, _RSP_REGNO, _R15_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _R14)) - stxi(48, _RSP_REGNO, _R14_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _R13)) - stxi(40, _RSP_REGNO, _R13_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _R12)) - stxi(32, _RSP_REGNO, _R12_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _RSI)) - stxi(24, _RSP_REGNO, _RSI_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _RDI)) - stxi(16, _RSP_REGNO, _RDI_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) - stxi( 8, _RSP_REGNO, _RBX_REGNO); -# else - if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) - stxi(40, _RSP_REGNO, _RBX_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _R12)) - stxi(32, _RSP_REGNO, _R12_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _R13)) - stxi(24, _RSP_REGNO, _R13_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _R14)) - stxi(16, _RSP_REGNO, _R14_REGNO); - if (jit_regset_tstbit(&_jitc->function->regset, _R15)) - stxi( 8, _RSP_REGNO, _R15_REGNO); -# endif + for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + stxi(offs, _RSP_REGNO, rn(iregs[reg])); + offs += REAL_WORDSIZE; + } + } +#if __X64 && (__CYGWIN__ || _WIN32) + for (reg = 0; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + sse_stxi_d(offs, _RSP_REGNO, rn(fregs[reg])); + offs += sizeof(jit_float64_t); + } + } #endif - stxi(0, _RSP_REGNO, _RBP_REGNO); - movr(_RBP_REGNO, _RSP_REGNO); + + if (_jitc->function->need_frame) { + stxi(0, _RSP_REGNO, _RBP_REGNO); + movr(_RBP_REGNO, _RSP_REGNO); + } /* alloca */ - subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack); + if (_jitc->function->stack) + subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack); if (_jitc->function->allocar) { reg = jit_get_reg(jit_class_gpr); movi(rn(reg), _jitc->function->self.aoff); @@ -3716,8 +4001,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) /* test %al, %al */ ic(0x84); ic(0xc0); - jes(0); - nofp_code = _jit->pc.w; + nofp_code = jes(0); /* Save fp registers in the save area, if any is a vararg */ /* Note that the full 16 byte xmm is not saved, because @@ -3728,7 +4012,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node) sse_stxi_d(_jitc->function->vaoff + first_fp_offset + reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg)); - patch_rel_char(nofp_code, _jit->pc.w); + patch_at(nofp_code, _jit->pc.w); } } #endif @@ -3737,68 +4021,38 @@ _prolog(jit_state_t *_jit, jit_node_t *node) static void _epilog(jit_state_t *_jit, jit_node_t *node) { + jit_int32_t reg, offs; if (_jitc->function->assume_frame) return; + if (_jitc->function->need_frame) + movr(_RSP_REGNO, _RBP_REGNO); + /* callee save registers */ - movr(_RSP_REGNO, _RBP_REGNO); -#if __X32 - if (jit_regset_tstbit(&_jitc->function->regset, _RDI)) - ldxi(_RDI_REGNO, _RSP_REGNO, 12); - if (jit_regset_tstbit(&_jitc->function->regset, _RSI)) - ldxi(_RSI_REGNO, _RSP_REGNO, 8); - if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) - ldxi(_RBX_REGNO, _RSP_REGNO, 4); -#else -# if __CYGWIN__ || _WIN32 - if (jit_regset_tstbit(&_jitc->function->regset, _XMM15)) - sse_ldxi_d(_XMM15_REGNO, _RSP_REGNO, 136); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM14)) - sse_ldxi_d(_XMM14_REGNO, _RSP_REGNO, 128); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM13)) - sse_ldxi_d(_XMM13_REGNO, _RSP_REGNO, 120); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM12)) - sse_ldxi_d(_XMM12_REGNO, _RSP_REGNO, 112); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM11)) - sse_ldxi_d(_XMM11_REGNO, _RSP_REGNO, 104); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM10)) - sse_ldxi_d(_XMM10_REGNO, _RSP_REGNO, 96); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM9)) - sse_ldxi_d(_XMM9_REGNO, _RSP_REGNO, 88); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM8)) - sse_ldxi_d(_XMM8_REGNO, _RSP_REGNO, 80); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM7)) - sse_ldxi_d(_XMM7_REGNO, _RSP_REGNO, 72); - if (jit_regset_tstbit(&_jitc->function->regset, _XMM6)) - sse_ldxi_d(_XMM6_REGNO, _RSP_REGNO, 64); - if (jit_regset_tstbit(&_jitc->function->regset, _R15)) - ldxi(_R15_REGNO, _RSP_REGNO, 56); - if (jit_regset_tstbit(&_jitc->function->regset, _R14)) - ldxi(_R14_REGNO, _RSP_REGNO, 48); - if (jit_regset_tstbit(&_jitc->function->regset, _R13)) - ldxi(_R13_REGNO, _RSP_REGNO, 40); - if (jit_regset_tstbit(&_jitc->function->regset, _R12)) - ldxi(_R12_REGNO, _RSP_REGNO, 32); - if (jit_regset_tstbit(&_jitc->function->regset, _RSI)) - ldxi(_RSI_REGNO, _RSP_REGNO, 24); - if (jit_regset_tstbit(&_jitc->function->regset, _RDI)) - ldxi(_RDI_REGNO, _RSP_REGNO, 16); - if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) - ldxi(_RBX_REGNO, _RSP_REGNO, 8); -# else - if (jit_regset_tstbit(&_jitc->function->regset, _RBX)) - ldxi(_RBX_REGNO, _RSP_REGNO, 40); - if (jit_regset_tstbit(&_jitc->function->regset, _R12)) - ldxi(_R12_REGNO, _RSP_REGNO, 32); - if (jit_regset_tstbit(&_jitc->function->regset, _R13)) - ldxi(_R13_REGNO, _RSP_REGNO, 24); - if (jit_regset_tstbit(&_jitc->function->regset, _R14)) - ldxi(_R14_REGNO, _RSP_REGNO, 16); - if (jit_regset_tstbit(&_jitc->function->regset, _R15)) - ldxi(_R15_REGNO, _RSP_REGNO, 8); -# endif + for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) { + ldxi(rn(iregs[reg]), _RSP_REGNO, offs); + offs += REAL_WORDSIZE; + } + } +#if __X64 && (__CYGWIN__ || _WIN32) + for (reg = 0; reg < jit_size(fregs); reg++) { + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) { + sse_ldxi_d(rn(fregs[reg]), _RSP_REGNO, offs); + offs += sizeof(jit_float64_t); + } + } #endif - ldxi(_RBP_REGNO, _RSP_REGNO, 0); - addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE); + + if (_jitc->function->need_frame) { + ldxi(_RBP_REGNO, _RSP_REGNO, 0); + addi(_RSP_REGNO, _RSP_REGNO, jit_framesize()); + } + /* This condition does not happen as much as expected because + * it is not safe to not create a frame pointer if any function + * is called, even jit functions, as those might call external + * functions. */ + else if (_jitc->function->need_stack) + addi(_RSP_REGNO, _RSP_REGNO, jit_framesize()); ic(0xc3); } @@ -3808,7 +4062,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0) { #if __X32 || __CYGWIN__ || _WIN32 assert(_jitc->function->self.call & jit_call_varargs); - addi(r0, _RBP_REGNO, _jitc->function->self.size); + addi(r0, _RBP_REGNO, jit_selfsize()); #else jit_int32_t reg; @@ -3827,7 +4081,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0) stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg)); /* Initialize overflow pointer to the first stack argument. */ - addi(rn(reg), _RBP_REGNO, _jitc->function->self.size); + addi(rn(reg), _RBP_REGNO, jit_selfsize()); stxi(offsetof(jit_va_list_t, over), r0, rn(reg)); /* Initialize register save area pointer. */ @@ -3861,8 +4115,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) /* Jump over if there are no remaining arguments in the save area. */ icmpi(rn(rg0), va_gp_max_offset); - jaes(0); - ge_code = _jit->pc.w; + ge_code = jaes(0); /* Load the save area pointer in the second temporary. */ ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save)); @@ -3878,11 +4131,10 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(rg1); /* Jump over overflow code. */ - jmpsi(0); - lt_code = _jit->pc.w; + lt_code = jmpsi(0); /* Where to land if argument is in overflow area. */ - patch_rel_char(ge_code, _jit->pc.w); + patch_at(ge_code, _jit->pc.w); /* Load overflow pointer. */ ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over)); @@ -3895,7 +4147,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) stxi(offsetof(jit_va_list_t, over), r1, rn(rg0)); /* Where to land if argument is in save area. */ - patch_rel_char(lt_code, _jit->pc.w); + patch_at(lt_code, _jit->pc.w); jit_unget_reg(rg0); #endif @@ -3929,8 +4181,7 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87) /* Jump over if there are no remaining arguments in the save area. */ icmpi(rn(rg0), va_fp_max_offset); - jaes(0); - ge_code = _jit->pc.w; + ge_code = jaes(0); /* Load the save area pointer in the second temporary. */ ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save)); @@ -3949,11 +4200,10 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87) jit_unget_reg(rg1); /* Jump over overflow code. */ - jmpsi(0); - lt_code = _jit->pc.w; + lt_code = jmpsi(0); /* Where to land if argument is in overflow area. */ - patch_rel_char(ge_code, _jit->pc.w); + patch_at(ge_code, _jit->pc.w); /* Load overflow pointer. */ ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over)); @@ -3969,27 +4219,57 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87) stxi(offsetof(jit_va_list_t, over), r1, rn(rg0)); /* Where to land if argument is in save area. */ - patch_rel_char(lt_code, _jit->pc.w); + patch_at(lt_code, _jit->pc.w); jit_unget_reg(rg0); #endif } static void -_patch_at(jit_state_t *_jit, jit_node_t *node, - jit_word_t instr, jit_word_t label) +_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label) { - switch (node->code) { -# if __X64 - case jit_code_calli: - case jit_code_jmpi: -# endif - case jit_code_movi: - patch_abs(instr, label); + jit_word_t disp; + jit_uint8_t *code = (jit_uint8_t *)instr; + ++instr; + switch (code[0]) { + /* movi_p */ + case 0xb8 ... 0xbf: + *(jit_word_t *)instr = label; break; - default: - patch_rel(instr, label); + /* forward pc relative address known to be in range */ +#if CAN_RIP_ADDRESS + /* movi */ + case 0x8d: + ++instr; + goto apply; +#endif + /* jcc */ + case 0x0f: + ++instr; + if (code[1] < 0x80 || code[1] > 0x8f) + goto fail; + /* calli */ + case 0xe8: + /* jmpi */ + case 0xe9: +#if CAN_RIP_ADDRESS + apply: +#endif + disp = label - (instr + 4); + assert((jit_int32_t)disp == disp); + *(jit_int32_t *)instr = disp; + break; + /* jccs */ + case 0x70 ... 0x7f: + /* jmpsi */ + case 0xeb: + disp = label - (instr + 1); + assert((jit_int8_t)disp == disp); + *(jit_int8_t *)instr = disp; break; + default: + fail: + abort(); } } #endif diff --git a/deps/lightning/lib/jit_x86-sse.c b/deps/lightning/lib/jit_x86-sse.c index 4447a52e..c3ac895e 100644 --- a/deps/lightning/lib/jit_x86-sse.c +++ b/deps/lightning/lib/jit_x86-sse.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -18,15 +18,6 @@ */ #if PROTO -# if __X32 -# define sse_address_p(i0) 1 -# else -# if __X64_32 -# define sse_address_p(i0) ((jit_word_t)(i0) >= 0) -# else -# define sse_address_p(i0) can_sign_extend_int_p(i0) -# endif -# endif # define _XMM6_REGNO 6 # define _XMM7_REGNO 7 # define _XMM8_REGNO 8 @@ -470,14 +461,14 @@ _sse_b##name##i_##type(jit_state_t *_jit, \ jit_word_t i0, jit_int32_t r0, \ jit_float##size##_t *i1) \ { \ - jit_word_t word; \ + jit_word_t w; \ jit_int32_t reg = jit_get_reg(jit_class_fpr|jit_class_xpr| \ jit_class_nospill); \ assert(jit_sse_reg_p(reg)); \ sse_movi_##type(rn(reg), i1); \ - word = sse_b##name##r_##type(i0, r0, rn(reg)); \ + w = sse_b##name##r_##type(i0, r0, rn(reg)); \ jit_unget_reg(reg); \ - return (word); \ + return (w); \ } # define fopi(name) fpr_opi(name, f, 32) # define fbopi(name) fpr_bopi(name, f, 32) @@ -809,8 +800,17 @@ _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) ldi = !_jitc->no_data; #if __X64 /* if will allocate a register for offset, just use immediate */ - if (ldi && !sse_address_p(i0)) +# if CAN_RIP_ADDRESS + if (ldi) { + jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8)); + ldi = can_sign_extend_int_p(rel); + if (!ldi && address_p(i0)) + ldi = 1; + } +# else + if (ldi && !address_p(i0)) ldi = 0; +# endif #endif if (ldi) sse_ldi_f(r0, (jit_word_t)i0); @@ -840,10 +840,9 @@ _sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } ixorr(reg, reg); ucomissr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_E, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -866,10 +865,9 @@ _sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } imovi(reg, 1); ucomissr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_NE, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -928,7 +926,13 @@ static void _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (sse_address_p(i0)) +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8)); + if (can_sign_extend_int_p(rel)) + movssmr(rel, _NOREG, _NOREG, _SCL8, r0); + else +#endif + if (address_p(i0)) movssmr(i0, _NOREG, _NOREG, _SCL1, r0); else { reg = jit_get_reg(jit_class_gpr); @@ -975,7 +979,13 @@ static void _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; - if (sse_address_p(i0)) +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8)); + if (can_sign_extend_int_p(rel)) + movssrm(r0, rel, _NOREG, _NOREG, _SCL8); + else +#endif + if (address_p(i0)) movssrm(r0, i0, _NOREG, _NOREG, _SCL1); else { reg = jit_get_reg(jit_class_gpr); @@ -1022,8 +1032,7 @@ static jit_word_t _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r1, r0); - ja(i0); - return (_jit->pc.w); + return (ja(i0)); } fbopi(lt) @@ -1031,21 +1040,20 @@ static jit_word_t _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r1, r0); - jae(i0); - return (_jit->pc.w); + return (jae(i0)); } fbopi(le) static jit_word_t _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_word_t jp_code; ucomissr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - je(i0); - patch_rel_char(jp_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jps(0); + w = je(i0); + patch_at(jp_code, _jit->pc.w); + return (w); } fbopi(eq) @@ -1053,8 +1061,7 @@ static jit_word_t _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jae(i0); - return (_jit->pc.w); + return (jae(i0)); } fbopi(ge) @@ -1062,25 +1069,23 @@ static jit_word_t _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - ja(i0); - return (_jit->pc.w); + return (ja(i0)); } fbopi(gt) static jit_word_t _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_word_t jp_code; jit_word_t jz_code; ucomissr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - jzs(0); - jz_code = _jit->pc.w; - patch_rel_char(jp_code, _jit->pc.w); - jmpi(i0); - patch_rel_char(jz_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jps(0); + jz_code = jzs(0); + patch_at(jp_code, _jit->pc.w); + w = jmpi(i0); + patch_at(jz_code, _jit->pc.w); + return (w); } fbopi(ne) @@ -1088,47 +1093,49 @@ static jit_word_t _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jnae(i0); - return (_jit->pc.w); + return (jnae(i0)); } fbopi(unlt) static jit_word_t _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomissr(r0, r1); - jna(i0); + w = jna(i0); } - return (_jit->pc.w); + return (w); } fbopi(unle) static jit_word_t _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomissr(r0, r1); - je(i0); + w = je(i0); } - return (_jit->pc.w); + return (w); } fbopi(uneq) static jit_word_t _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomissr(r1, r0); - jna(i0); + w = jna(i0); } - return (_jit->pc.w); + return (w); } fbopi(unge) @@ -1136,8 +1143,7 @@ static jit_word_t _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r1, r0); - jnae(i0); - return (_jit->pc.w); + return (jnae(i0)); } fbopi(ungt) @@ -1145,8 +1151,7 @@ static jit_word_t _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jne(i0); - return (_jit->pc.w); + return (jne(i0)); } fbopi(ltgt) @@ -1154,8 +1159,7 @@ static jit_word_t _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jnp(i0); - return (_jit->pc.w); + return (jnp(i0)); } fbopi(ord) @@ -1163,8 +1167,7 @@ static jit_word_t _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomissr(r0, r1); - jp(i0); - return (_jit->pc.w); + return (jp(i0)); } fbopi(unord) @@ -1185,10 +1188,9 @@ _sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } ixorr(reg, reg); ucomisdr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_E, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -1211,10 +1213,9 @@ _sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) } imovi(reg, 1); ucomisdr(r2, r1); - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_NE, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -1294,8 +1295,17 @@ _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) ldi = !_jitc->no_data; #if __X64 /* if will allocate a register for offset, just use immediate */ - if (ldi && !sse_address_p(i0)) +# if CAN_RIP_ADDRESS + if (ldi) { + jit_word_t rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8)); + ldi = can_sign_extend_int_p(rel); + if (!ldi && address_p(i0)) + ldi = 1; + } +# else + if (ldi && !address_p(i0)) ldi = 0; +# endif #endif if (ldi) sse_ldi_d(r0, (jit_word_t)i0); @@ -1306,6 +1316,7 @@ _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) movdqxr(r0, rn(reg)); jit_unget_reg(reg); #else + CHECK_CVT_OFFSET(); movi(rn(reg), data.ii[0]); stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); movi(rn(reg), data.ii[1]); @@ -1321,7 +1332,13 @@ static void _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0) { jit_int32_t reg; - if (sse_address_p(i0)) +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8)); + if (can_sign_extend_int_p(rel)) + movsdmr(rel, _NOREG, _NOREG, _SCL8, r0); + else +#endif + if (address_p(i0)) movsdmr(i0, _NOREG, _NOREG, _SCL1, r0); else { reg = jit_get_reg(jit_class_gpr); @@ -1368,7 +1385,13 @@ static void _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0) { jit_int32_t reg; - if (sse_address_p(i0)) +#if CAN_RIP_ADDRESS + jit_word_t rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8)); + if (can_sign_extend_int_p(rel)) + movsdrm(r0, rel, _NOREG, _NOREG, _SCL8); + else +#endif + if (address_p(i0)) movsdrm(r0, i0, _NOREG, _NOREG, _SCL1); else { reg = jit_get_reg(jit_class_gpr); @@ -1415,8 +1438,7 @@ static jit_word_t _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r1, r0); - ja(i0); - return (_jit->pc.w); + return (ja(i0)); } dbopi(lt) @@ -1424,21 +1446,20 @@ static jit_word_t _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r1, r0); - jae(i0); - return (_jit->pc.w); + return (jae(i0)); } dbopi(le) static jit_word_t _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_word_t jp_code; ucomisdr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - je(i0); - patch_rel_char(jp_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jps(0); + w = je(i0); + patch_at(jp_code, _jit->pc.w); + return (w); } dbopi(eq) @@ -1446,8 +1467,7 @@ static jit_word_t _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jae(i0); - return (_jit->pc.w); + return (jae(i0)); } dbopi(ge) @@ -1455,25 +1475,23 @@ static jit_word_t _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - ja(i0); - return (_jit->pc.w); + return (ja(i0)); } dbopi(gt) static jit_word_t _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_word_t jp_code; jit_word_t jz_code; ucomisdr(r0, r1); - jps(0); - jp_code = _jit->pc.w; - jzs(0); - jz_code = _jit->pc.w; - patch_rel_char(jp_code, _jit->pc.w); - jmpi(i0); - patch_rel_char(jz_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jps(0); + jz_code = jzs(0); + patch_at(jp_code, _jit->pc.w); + w = jmpi(i0); + patch_at(jz_code, _jit->pc.w); + return (w); } dbopi(ne) @@ -1481,47 +1499,49 @@ static jit_word_t _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jnae(i0); - return (_jit->pc.w); + return (jnae(i0)); } dbopi(unlt) static jit_word_t _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomisdr(r0, r1); - jna(i0); + w = jna(i0); } - return (_jit->pc.w); + return (w); } dbopi(unle) static jit_word_t _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomisdr(r0, r1); - je(i0); + w = je(i0); } - return (_jit->pc.w); + return (w); } dbopi(uneq) static jit_word_t _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; if (r0 == r1) - jmpi(i0); + w = jmpi(i0); else { ucomisdr(r1, r0); - jna(i0); + w = jna(i0); } - return (_jit->pc.w); + return (w); } dbopi(unge) @@ -1529,8 +1549,7 @@ static jit_word_t _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r1, r0); - jnae(i0); - return (_jit->pc.w); + return (jnae(i0)); } dbopi(ungt) @@ -1538,8 +1557,7 @@ static jit_word_t _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jne(i0); - return (_jit->pc.w); + return (jne(i0)); } dbopi(ltgt) @@ -1547,8 +1565,7 @@ static jit_word_t _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jnp(i0); - return (_jit->pc.w); + return (jnp(i0)); } dbopi(ord) @@ -1556,8 +1573,7 @@ static jit_word_t _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { ucomisdr(r0, r1); - jp(i0); - return (_jit->pc.w); + return (jp(i0)); } dbopi(unord) # undef fopi diff --git a/deps/lightning/lib/jit_x86-sz.c b/deps/lightning/lib/jit_x86-sz.c index eb668b3c..5c4515a7 100644 --- a/deps/lightning/lib/jit_x86-sz.c +++ b/deps/lightning/lib/jit_x86-sz.c @@ -3,9 +3,10 @@ #define JIT_INSTR_MAX 42 0, /* data */ 0, /* live */ - 3, /* align */ + 11, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 3, /* label */ @@ -14,7 +15,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -22,8 +26,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 3, /* va_start */ 5, /* va_arg */ 7, /* va_arg_d */ @@ -36,9 +52,9 @@ 5, /* addxi */ 4, /* subr */ 6, /* subi */ - 6, /* subcr */ + 12, /* subcr */ 6, /* subci */ - 6, /* subxr */ + 12, /* subxr */ 5, /* subxi */ 8, /* rsbi */ 5, /* mulr */ @@ -52,9 +68,9 @@ 22, /* divr_u */ 25, /* divi_u */ 23, /* qdivr */ - 26, /* qdivi */ + 28, /* qdivi */ 24, /* qdivr_u */ - 27, /* qdivi_u */ + 29, /* qdivi_u */ 21, /* remr */ 24, /* remi */ 22, /* remr_u */ @@ -97,12 +113,17 @@ 5, /* movi */ 5, /* movnr */ 5, /* movzr */ + 9, /* casr */ + 13, /* casi */ 11, /* extr_c */ 11, /* extr_uc */ 3, /* extr_s */ 3, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 7, /* bswapr_us */ + 4, /* bswapr_ui */ + 0, /* bswapr_ul */ 7, /* htonr_us */ 4, /* htonr_ui */ 0, /* htonr_ul */ @@ -195,13 +216,37 @@ 2, /* callr */ 5, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -350,7 +395,7 @@ 4, /* extr_d */ 4, /* extr_f_d */ 10, /* movr_d */ - 24, /* movi_d */ + 33, /* movi_d */ 4, /* ldr_d */ 8, /* ldi_d */ 5, /* ldxr_d */ @@ -401,21 +446,21 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 7, /* bswapr_us */ - 4, /* bswapr_ui */ - 0, /* bswapr_ul */ - 9, /* casr */ - 13, /* casi */ -#endif + 21, /* clo */ + 17, /* clz */ + 15, /* cto */ + 11, /* ctz */ +#endif /* __X32 */ #if __X64 #if __CYGWIN__ || _WIN32 #define JIT_INSTR_MAX 130 0, /* data */ 0, /* live */ - 6, /* align */ + 27, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 7, /* label */ @@ -424,7 +469,10 @@ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -432,8 +480,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 7, /* va_start */ 7, /* va_arg */ 9, /* va_arg_d */ @@ -507,29 +567,34 @@ 10, /* movi */ 7, /* movnr */ 7, /* movzr */ + 11, /* casr */ + 21, /* casi */ 7, /* extr_c */ 7, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 3, /* extr_i */ 3, /* extr_ui */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 6, /* bswapr_ul */ 9, /* htonr_us */ 6, /* htonr_ui */ 6, /* htonr_ul */ 4, /* ldr_c */ - 15, /* ldi_c */ + 14, /* ldi_c */ 4, /* ldr_uc */ - 15, /* ldi_uc */ + 14, /* ldi_uc */ 4, /* ldr_s */ - 15, /* ldi_s */ + 14, /* ldi_s */ 4, /* ldr_us */ - 15, /* ldi_us */ + 14, /* ldi_us */ 3, /* ldr_i */ - 14, /* ldi_i */ + 13, /* ldi_i */ 3, /* ldr_ui */ - 14, /* ldi_ui */ + 13, /* ldi_ui */ 3, /* ldr_l */ - 14, /* ldi_l */ + 13, /* ldi_l */ 5, /* ldxr_c */ 8, /* ldxi_c */ 5, /* ldxr_uc */ @@ -545,13 +610,13 @@ 4, /* ldxr_l */ 7, /* ldxi_l */ 6, /* str_c */ - 17, /* sti_c */ + 16, /* sti_c */ 4, /* str_s */ - 15, /* sti_s */ + 14, /* sti_s */ 3, /* str_i */ - 14, /* sti_i */ + 13, /* sti_i */ 3, /* str_l */ - 14, /* sti_l */ + 13, /* sti_l */ 7, /* stxr_c */ 7, /* stxi_c */ 5, /* stxr_s */ @@ -600,18 +665,42 @@ 10, /* bxsubi */ 9, /* bxsubr_u */ 10, /* bxsubi_u */ - 3, /* jmpr */ + 2, /* jmpr */ 5, /* jmpi */ - 3, /* callr */ - 13, /* calli */ + 2, /* callr */ + 20, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -625,87 +714,87 @@ 0, /* putargr_f */ 0, /* putargi_f */ 10, /* addr_f */ - 21, /* addi_f */ + 19, /* addi_f */ 15, /* subr_f */ - 21, /* subi_f */ - 27, /* rsbi_f */ + 19, /* subi_f */ + 26, /* rsbi_f */ 10, /* mulr_f */ - 21, /* muli_f */ + 19, /* muli_f */ 15, /* divr_f */ - 21, /* divi_f */ - 15, /* negr_f */ + 19, /* divi_f */ + 14, /* negr_f */ 15, /* absr_f */ 5, /* sqrtr_f */ 16, /* ltr_f */ - 31, /* lti_f */ + 30, /* lti_f */ 16, /* ler_f */ - 31, /* lei_f */ + 30, /* lei_f */ 18, /* eqr_f */ - 33, /* eqi_f */ + 32, /* eqi_f */ 16, /* ger_f */ - 31, /* gei_f */ + 30, /* gei_f */ 16, /* gtr_f */ - 31, /* gti_f */ + 30, /* gti_f */ 20, /* ner_f */ - 35, /* nei_f */ + 34, /* nei_f */ 16, /* unltr_f */ - 31, /* unlti_f */ + 30, /* unlti_f */ 16, /* unler_f */ - 31, /* unlei_f */ + 30, /* unlei_f */ 16, /* uneqr_f */ - 31, /* uneqi_f */ + 30, /* uneqi_f */ 16, /* unger_f */ - 31, /* ungei_f */ + 30, /* ungei_f */ 16, /* ungtr_f */ - 31, /* ungti_f */ + 30, /* ungti_f */ 16, /* ltgtr_f */ - 31, /* ltgti_f */ + 30, /* ltgti_f */ 16, /* ordr_f */ - 31, /* ordi_f */ + 30, /* ordi_f */ 16, /* unordr_f */ - 31, /* unordi_f */ + 30, /* unordi_f */ 5, /* truncr_f_i */ 5, /* truncr_f_l */ 5, /* extr_f */ 5, /* extr_d_f */ 5, /* movr_f */ - 15, /* movi_f */ + 18, /* movi_f */ 5, /* ldr_f */ - 16, /* ldi_f */ + 15, /* ldi_f */ 6, /* ldxr_f */ 8, /* ldxi_f */ 5, /* str_f */ - 16, /* sti_f */ + 15, /* sti_f */ 6, /* stxr_f */ 9, /* stxi_f */ 10, /* bltr_f */ - 21, /* blti_f */ + 19, /* blti_f */ 10, /* bler_f */ - 24, /* blei_f */ + 23, /* blei_f */ 12, /* beqr_f */ 27, /* beqi_f */ 10, /* bger_f */ - 25, /* bgei_f */ + 24, /* bgei_f */ 10, /* bgtr_f */ - 25, /* bgti_f */ + 24, /* bgti_f */ 13, /* bner_f */ - 28, /* bnei_f */ + 27, /* bnei_f */ 10, /* bunltr_f */ - 25, /* bunlti_f */ + 24, /* bunlti_f */ 10, /* bunler_f */ - 25, /* bunlei_f */ + 24, /* bunlei_f */ 10, /* buneqr_f */ - 25, /* buneqi_f */ + 24, /* buneqi_f */ 10, /* bunger_f */ - 25, /* bungei_f */ + 24, /* bungei_f */ 10, /* bungtr_f */ - 25, /* bungti_f */ + 24, /* bungti_f */ 10, /* bltgtr_f */ - 25, /* bltgti_f */ + 24, /* bltgti_f */ 10, /* bordr_f */ - 25, /* bordi_f */ + 24, /* bordi_f */ 10, /* bunordr_f */ - 25, /* bunordi_f */ + 24, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -724,7 +813,7 @@ 25, /* muli_d */ 15, /* divr_d */ 25, /* divi_d */ - 22, /* negr_d */ + 21, /* negr_d */ 16, /* absr_d */ 5, /* sqrtr_d */ 17, /* ltr_d */ @@ -760,13 +849,13 @@ 5, /* extr_d */ 5, /* extr_f_d */ 5, /* movr_d */ - 15, /* movi_d */ + 29, /* movi_d */ 5, /* ldr_d */ - 16, /* ldi_d */ + 15, /* ldi_d */ 6, /* ldxr_d */ 8, /* ldxi_d */ 5, /* str_d */ - 16, /* sti_d */ + 15, /* sti_d */ 6, /* stxr_d */ 9, /* stxi_d */ 11, /* bltr_d */ @@ -811,29 +900,32 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 9, /* bswapr_us */ - 6, /* bswapr_ui */ - 6, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 27, /* clo */ + 21, /* clz */ + 20, /* cto */ + 14, /* ctz */ #else # if __X64_32 -#define JIT_INSTR_MAX 108 +#define JIT_INSTR_MAX 105 0, /* data */ 0, /* live */ - 3, /* align */ + 7, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 3, /* label */ - 108, /* prolog */ + 105, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -841,11 +933,23 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ - 41, /* va_start */ - 45, /* va_arg */ - 54, /* va_arg_d */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ + 33, /* va_start */ + 43, /* va_arg */ + 45, /* va_arg_d */ 0, /* va_end */ 5, /* addr */ 7, /* addi */ @@ -916,12 +1020,17 @@ 6, /* movi */ 7, /* movnr */ 7, /* movzr */ + 11, /* casr */ + 16, /* casi */ 7, /* extr_c */ 7, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 0, /* extr_i */ 0, /* extr_ui */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 0, /* bswapr_ul */ 9, /* htonr_us */ 6, /* htonr_ui */ 0, /* htonr_ul */ @@ -961,11 +1070,11 @@ 8, /* sti_i */ 0, /* str_l */ 0, /* sti_l */ - 12, /* stxr_c */ + 11, /* stxr_c */ 7, /* stxi_c */ - 10, /* stxr_s */ + 9, /* stxr_s */ 7, /* stxi_s */ - 9, /* stxr_i */ + 8, /* stxr_i */ 6, /* stxi_i */ 0, /* stxr_l */ 0, /* stxi_l */ @@ -1011,16 +1120,40 @@ 10, /* bxsubi_u */ 2, /* jmpr */ 5, /* jmpi */ - 3, /* callr */ + 2, /* callr */ 9, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -1034,14 +1167,14 @@ 0, /* putargr_f */ 0, /* putargi_f */ 10, /* addr_f */ - 21, /* addi_f */ + 20, /* addi_f */ 15, /* subr_f */ - 21, /* subi_f */ - 26, /* rsbi_f */ + 20, /* subi_f */ + 25, /* rsbi_f */ 10, /* mulr_f */ - 21, /* muli_f */ + 20, /* muli_f */ 15, /* divr_f */ - 21, /* divi_f */ + 20, /* divi_f */ 15, /* negr_f */ 15, /* absr_f */ 5, /* sqrtr_f */ @@ -1081,40 +1214,40 @@ 11, /* movi_f */ 6, /* ldr_f */ 10, /* ldi_f */ - 11, /* ldxr_f */ + 10, /* ldxr_f */ 9, /* ldxi_f */ 6, /* str_f */ 10, /* sti_f */ - 11, /* stxr_f */ + 10, /* stxr_f */ 9, /* stxi_f */ 10, /* bltr_f */ - 21, /* blti_f */ + 20, /* blti_f */ 10, /* bler_f */ - 21, /* blei_f */ + 20, /* blei_f */ 12, /* beqr_f */ 23, /* beqi_f */ 10, /* bger_f */ - 21, /* bgei_f */ + 20, /* bgei_f */ 10, /* bgtr_f */ - 21, /* bgti_f */ + 20, /* bgti_f */ 13, /* bner_f */ - 24, /* bnei_f */ + 23, /* bnei_f */ 10, /* bunltr_f */ - 21, /* bunlti_f */ + 20, /* bunlti_f */ 10, /* bunler_f */ - 21, /* bunlei_f */ + 20, /* bunlei_f */ 10, /* buneqr_f */ - 21, /* buneqi_f */ + 20, /* buneqi_f */ 10, /* bunger_f */ - 21, /* bungei_f */ + 20, /* bungei_f */ 10, /* bungtr_f */ - 21, /* bungti_f */ + 20, /* bungti_f */ 10, /* bltgtr_f */ - 21, /* bltgti_f */ + 20, /* bltgti_f */ 10, /* bordr_f */ - 21, /* bordi_f */ + 20, /* bordi_f */ 10, /* bunordr_f */ - 21, /* bunordi_f */ + 20, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -1125,14 +1258,14 @@ 0, /* putargr_d */ 0, /* putargi_d */ 10, /* addr_d */ - 33, /* addi_d */ + 29, /* addi_d */ 15, /* subr_d */ - 33, /* subi_d */ - 38, /* rsbi_d */ + 29, /* subi_d */ + 34, /* rsbi_d */ 10, /* mulr_d */ - 33, /* muli_d */ + 29, /* muli_d */ 15, /* divr_d */ - 33, /* divi_d */ + 29, /* divi_d */ 22, /* negr_d */ 16, /* absr_d */ 5, /* sqrtr_d */ @@ -1172,40 +1305,40 @@ 23, /* movi_d */ 6, /* ldr_d */ 10, /* ldi_d */ - 11, /* ldxr_d */ + 10, /* ldxr_d */ 9, /* ldxi_d */ 6, /* str_d */ 10, /* sti_d */ - 11, /* stxr_d */ + 10, /* stxr_d */ 9, /* stxi_d */ 11, /* bltr_d */ - 34, /* blti_d */ + 30, /* blti_d */ 11, /* bler_d */ - 34, /* blei_d */ + 30, /* blei_d */ 13, /* beqr_d */ 36, /* beqi_d */ 11, /* bger_d */ - 34, /* bgei_d */ + 30, /* bgei_d */ 11, /* bgtr_d */ - 34, /* bgti_d */ + 30, /* bgti_d */ 14, /* bner_d */ - 37, /* bnei_d */ + 33, /* bnei_d */ 11, /* bunltr_d */ - 34, /* bunlti_d */ + 30, /* bunlti_d */ 11, /* bunler_d */ - 34, /* bunlei_d */ + 30, /* bunlei_d */ 11, /* buneqr_d */ - 34, /* buneqi_d */ + 30, /* buneqi_d */ 11, /* bunger_d */ - 34, /* bungei_d */ + 30, /* bungei_d */ 11, /* bungtr_d */ - 34, /* bungti_d */ + 30, /* bungti_d */ 11, /* bltgtr_d */ - 34, /* bltgti_d */ + 30, /* bltgti_d */ 11, /* bordr_d */ - 34, /* bordi_d */ + 30, /* bordi_d */ 11, /* bunordr_d */ - 34, /* bunordi_d */ + 30, /* bunordi_d */ 0, /* pushargr_d */ 0, /* pushargi_d */ 0, /* retr_d */ @@ -1220,28 +1353,31 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 9, /* bswapr_us */ - 6, /* bswapr_ui */ - 0, /* bswapr_ul */ - 0, /* casr */ - 0, /* casi */ + 11, /* clo */ + 5, /* clz */ + 11, /* cto */ + 5, /* ctz */ +#else -# else -#define JIT_INSTR_MAX 115 +#define JIT_INSTR_MAX 112 0, /* data */ 0, /* live */ - 6, /* align */ + 27, /* align */ 0, /* save */ 0, /* load */ + 4, /* skip */ 0, /* #name */ 0, /* #note */ 7, /* label */ - 115, /* prolog */ + 112, /* prolog */ 0, /* ellipsis */ 0, /* va_push */ 0, /* allocai */ 0, /* allocar */ - 0, /* arg */ + 0, /* arg_c */ + 0, /* arg_s */ + 0, /* arg_i */ + 0, /* arg_l */ 0, /* getarg_c */ 0, /* getarg_uc */ 0, /* getarg_s */ @@ -1249,8 +1385,20 @@ 0, /* getarg_i */ 0, /* getarg_ui */ 0, /* getarg_l */ - 0, /* putargr */ - 0, /* putargi */ + 0, /* putargr_c */ + 0, /* putargi_c */ + 0, /* putargr_uc */ + 0, /* putargi_uc */ + 0, /* putargr_s */ + 0, /* putargi_s */ + 0, /* putargr_us */ + 0, /* putargi_us */ + 0, /* putargr_i */ + 0, /* putargi_i */ + 0, /* putargr_ui */ + 0, /* putargi_ui */ + 0, /* putargr_l */ + 0, /* putargi_l */ 38, /* va_start */ 41, /* va_arg */ 48, /* va_arg_d */ @@ -1324,12 +1472,17 @@ 10, /* movi */ 7, /* movnr */ 7, /* movzr */ + 11, /* casr */ + 16, /* casi */ 4, /* extr_c */ 4, /* extr_uc */ 4, /* extr_s */ 4, /* extr_us */ 3, /* extr_i */ 3, /* extr_ui */ + 9, /* bswapr_us */ + 6, /* bswapr_ui */ + 6, /* bswapr_ul */ 9, /* htonr_us */ 6, /* htonr_ui */ 6, /* htonr_ul */ @@ -1418,17 +1571,41 @@ 9, /* bxsubr_u */ 10, /* bxsubi_u */ 2, /* jmpr */ - 13, /* jmpi */ - 3, /* callr */ - 12, /* calli */ + 5, /* jmpi */ + 2, /* callr */ + 13, /* calli */ 0, /* prepare */ - 0, /* pushargr */ - 0, /* pushargi */ + 0, /* pushargr_c */ + 0, /* pushargi_c */ + 0, /* pushargr_uc */ + 0, /* pushargi_uc */ + 0, /* pushargr_s */ + 0, /* pushargi_s */ + 0, /* pushargr_us */ + 0, /* pushargi_us */ + 0, /* pushargr_i */ + 0, /* pushargi_i */ + 0, /* pushargr_ui */ + 0, /* pushargi_ui */ + 0, /* pushargr_l */ + 0, /* pushargi_l */ 0, /* finishr */ 0, /* finishi */ 0, /* ret */ - 0, /* retr */ - 0, /* reti */ + 0, /* retr_c */ + 0, /* reti_c */ + 0, /* retr_uc */ + 0, /* reti_uc */ + 0, /* retr_s */ + 0, /* reti_s */ + 0, /* retr_us */ + 0, /* reti_us */ + 0, /* retr_i */ + 0, /* reti_i */ + 0, /* retr_ui */ + 0, /* reti_ui */ + 0, /* retr_l */ + 0, /* reti_l */ 0, /* retval_c */ 0, /* retval_uc */ 0, /* retval_s */ @@ -1498,31 +1675,31 @@ 10, /* bltr_f */ 20, /* blti_f */ 10, /* bler_f */ - 25, /* blei_f */ + 22, /* blei_f */ 12, /* beqr_f */ - 27, /* beqi_f */ + 22, /* beqi_f */ 10, /* bger_f */ - 25, /* bgei_f */ + 22, /* bgei_f */ 10, /* bgtr_f */ - 25, /* bgti_f */ + 22, /* bgti_f */ 13, /* bner_f */ - 28, /* bnei_f */ + 25, /* bnei_f */ 10, /* bunltr_f */ - 25, /* bunlti_f */ + 23, /* bunlti_f */ 10, /* bunler_f */ - 25, /* bunlei_f */ + 23, /* bunlei_f */ 10, /* buneqr_f */ - 25, /* buneqi_f */ + 23, /* buneqi_f */ 10, /* bunger_f */ - 25, /* bungei_f */ + 23, /* bungei_f */ 10, /* bungtr_f */ - 25, /* bungti_f */ + 22, /* bungti_f */ 10, /* bltgtr_f */ - 25, /* bltgti_f */ + 22, /* bltgti_f */ 10, /* bordr_f */ - 25, /* bordi_f */ + 22, /* bordi_f */ 10, /* bunordr_f */ - 25, /* bunordi_f */ + 22, /* bunordi_f */ 0, /* pushargr_f */ 0, /* pushargi_f */ 0, /* retr_f */ @@ -1628,11 +1805,10 @@ 0, /* movi_d_ww */ 0, /* movr_d_w */ 0, /* movi_d_w */ - 9, /* bswapr_us */ - 6, /* bswapr_ui */ - 6, /* bswapr_ul */ - 11, /* casr */ - 16, /* casi */ + 11, /* clo */ + 5, /* clz */ + 11, /* cto */ + 5, /* ctz */ #endif /* __CYGWIN__ || _WIN32 */ # endif /* __X64_32 */ #endif /* __X64 */ diff --git a/deps/lightning/lib/jit_x86-x87.c b/deps/lightning/lib/jit_x86-x87.c index 227b1a2f..3de0214e 100644 --- a/deps/lightning/lib/jit_x86-x87.c +++ b/deps/lightning/lib/jit_x86-x87.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -408,14 +408,14 @@ _x87_b##name##i_##type(jit_state_t *_jit, \ jit_word_t i0, jit_int32_t r0, \ jit_float##size##_t *i1) \ { \ - jit_word_t word; \ + jit_word_t w; \ jit_int32_t reg = jit_get_reg(jit_class_fpr| \ jit_class_nospill); \ assert(jit_x87_reg_p(reg)); \ x87_movi_##type(rn(reg), i1); \ - word = x87_b##name##r_##type(i0, r0, rn(reg)); \ + w = x87_b##name##r_##type(i0, r0, rn(reg)); \ jit_unget_reg(reg); \ - return (word); \ + return (w); \ } # define fopi(name) fpr_opi(name, f, 32) # define fbopi(name) fpr_bopi(name, f, 32) @@ -662,6 +662,7 @@ _x87_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { + CHECK_CVT_OFFSET(); #if defined(sun) /* for the sake of passing test cases in x87 mode, otherwise only sse * is supported */ @@ -692,6 +693,7 @@ _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { + CHECK_CVT_OFFSET(); fldr(r1); fisttpqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1); ldxi(r0, _RBP_REGNO, CVT_OFFSET); @@ -701,6 +703,7 @@ _x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _x87_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { + CHECK_CVT_OFFSET(); stxi(CVT_OFFSET, _RBP_REGNO, r1); # if __X32 fildlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1); @@ -771,8 +774,7 @@ _x87jcc(jit_state_t *_jit, jit_int32_t code, fldr(r0); fucomipr(r1 + 1); } - jcc(code, i0); - return (_jit->pc.w); + return (jcc(code, i0)); } static jit_word_t @@ -788,8 +790,7 @@ _x87jcc2(jit_state_t *_jit, jit_int32_t code, fldr(f0); fucomipr(f1 + 1); } - jcc(code, i0); - return (_jit->pc.w); + return (jcc(code, i0)); } fopi(lt) @@ -847,6 +848,7 @@ _x87_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0) fldln2(); else { if (_jitc->no_data) { + CHECK_CVT_OFFSET(); reg = jit_get_reg(jit_class_gpr); movi(rn(reg), data.i); stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg)); @@ -1038,6 +1040,7 @@ _x87_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0) fldln2(); else { if (_jitc->no_data) { + CHECK_CVT_OFFSET(); reg = jit_get_reg(jit_class_gpr); #if __X32 || __X64_32 movi(rn(reg), data.ii[0]); @@ -1082,10 +1085,9 @@ _x87_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) fldr(f1); fucomipr(f2 + 1); } - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_E, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -1115,10 +1117,9 @@ _x87_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) fldr(f1); fucomipr(f2 + 1); } - jpes(0); - jp_code = _jit->pc.w; + jp_code = jpes(0); cc(X86_CC_NE, reg); - patch_rel_char(jp_code, _jit->pc.w); + patch_at(jp_code, _jit->pc.w); if (!rc) xchgr(r0, reg); } @@ -1283,6 +1284,7 @@ dbopi(le) static jit_word_t _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_int32_t f0, f1; jit_word_t jp_code; if (r1 == _ST0_REGNO) f0 = r1, f1 = r0; @@ -1293,11 +1295,10 @@ _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) fldr(f0); fucomipr(f1 + 1); } - jpes(0); - jp_code = _jit->pc.w; - jcc(X86_CC_E, i0); - patch_rel_char(jp_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jpes(0); + w = jcc(X86_CC_E, i0); + patch_at(jp_code, _jit->pc.w); + return (w); } dbopi(eq) dbopi(ge) @@ -1306,6 +1307,7 @@ dbopi(gt) static jit_word_t _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) { + jit_word_t w; jit_int32_t f0, f1; jit_word_t jp_code; jit_word_t jz_code; @@ -1317,14 +1319,12 @@ _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1) fldr(f0); fucomipr(f1 + 1); } - jpes(0); - jp_code = _jit->pc.w; - jzs(0); - jz_code = _jit->pc.w; - patch_rel_char(jp_code, _jit->pc.w); - jmpi(i0); - patch_rel_char(jz_code, _jit->pc.w); - return (_jit->pc.w); + jp_code = jpes(0); + jz_code = jzs(0); + patch_at(jp_code, _jit->pc.w); + w = jmpi(i0); + patch_at(jz_code, _jit->pc.w); + return (w); } dbopi(ne) dbopi(unlt) diff --git a/deps/lightning/lib/jit_x86.c b/deps/lightning/lib/jit_x86.c index 6472e566..b4094572 100644 --- a/deps/lightning/lib/jit_x86.c +++ b/deps/lightning/lib/jit_x86.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -21,23 +21,39 @@ #include #if __X32 +# define CAN_RIP_ADDRESS 0 +# define address_p(i0) 1 # define jit_arg_reg_p(i) 0 # define jit_arg_f_reg_p(i) 0 -# define stack_framesize 20 -# define stack_adjust 12 -# define CVT_OFFSET -12 +/* callee save + 16 byte align + * align16(%ebp + %rbx + %rsi + %rdi) + (16 - 4) */ +# define stack_framesize 28 # define REAL_WORDSIZE 4 # define va_gp_increment 4 # define va_fp_increment 8 #else +# if _WIN32 || __X64_32 +# define CAN_RIP_ADDRESS 0 +# else +# define CAN_RIP_ADDRESS 1 +# endif +# if __X64_32 +# define address_p(i0) ((jit_word_t)(i0) >= 0) +# else +# define address_p(i0) can_sign_extend_int_p(i0) +# endif # if __CYGWIN__ || _WIN32 # define jit_arg_reg_p(i) ((i) >= 0 && (i) < 4) # define jit_arg_f_reg_p(i) jit_arg_reg_p(i) +/* callee save + 16 byte align + * align16(%rbp+%rbx+%rdi+%rsi+%r1[2-5]+%xmm[6-9]+%xmm1[0-5]) + (16 - 8) */ # define stack_framesize 152 # define va_fp_increment 8 # else # define jit_arg_reg_p(i) ((i) >= 0 && (i) < 6) # define jit_arg_f_reg_p(i) ((i) >= 0 && (i) < 8) +/* callee save + 16 byte align + * align16(%rbp + %r15 + %r14 + %r13 + %r12 + %rbx) + (16 - 8) */ # define stack_framesize 56 # define first_gp_argument rdi # define first_gp_offset offsetof(jit_va_list_t, rdi) @@ -54,10 +70,18 @@ # define first_fp_from_offset(fp) (((fp) - va_gp_max_offset) / 16) # endif # define va_gp_increment 8 -# define stack_adjust 8 -# define CVT_OFFSET -8 # define REAL_WORDSIZE 8 #endif +#define CVT_OFFSET _jitc->function->cvt_offset + +#define CHECK_CVT_OFFSET() \ + do { \ + if (!_jitc->function->cvt_offset) { \ + _jitc->again = 1; \ + _jitc->function->cvt_offset = \ + jit_allocai(sizeof(jit_float64_t)); \ + } \ + } while (0) /* * Types @@ -99,6 +123,8 @@ typedef struct jit_va_list { /* * Prototypes */ +#define compute_framesize() _compute_framesize(_jit) +static void _compute_framesize(jit_state_t*); #define patch(instr, node) _patch(_jit, instr, node) static void _patch(jit_state_t*,jit_word_t,jit_node_t*); #define sse_from_x87_f(r0, r1) _sse_from_x87_f(_jit, r0, r1) @@ -227,6 +253,22 @@ jit_register_t _rvs[] = { { _NOREG, "" }, }; +static jit_int32_t iregs[] = { +#if __X32 + _RBX, _RSI, _RDI, +#elif (__CYGWIN__ || _WIN32) + _RBX, _RDI, _RSI, _R12, _R13, _R14, _R15, +#else + _R15, _R14, _R13, _R12, _RBX, +#endif +}; + +#if __X64 && (__CYGWIN__ || _WIN32) +static jit_int32_t fregs[] = { + _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15, +}; +#endif + /* * Implementation */ @@ -234,6 +276,45 @@ void jit_get_cpu(void) { union { + /* eax=7 and ecx=0 */ + struct { + jit_uword_t fsgsbase : 1; + jit_uword_t IA32_TSC_ADJUST : 1; + jit_uword_t sgx : 1; + jit_uword_t bmi1 : 1; + jit_uword_t hle : 1; + jit_uword_t avx2 : 1; + jit_uword_t FDP_EXCPTN_ONLY : 1; + jit_uword_t smep : 1; + jit_uword_t bmi2 : 1; + jit_uword_t erms : 1; + jit_uword_t invpcid : 1; + jit_uword_t rtm : 1; + jit_uword_t rdt_m_pqm : 1; + jit_uword_t dep_FPU_CS_DS : 1; + jit_uword_t mpx : 1; + jit_uword_t rdt_a_pqe : 1; + jit_uword_t avx512_f : 1; + jit_uword_t avx512_dq : 1; + jit_uword_t rdseed : 1; + jit_uword_t adx : 1; + jit_uword_t smap : 1; + jit_uword_t avx512_ifma : 1; + jit_uword_t __reserved0 : 1; + jit_uword_t clflushopt : 1; + jit_uword_t clwb : 1; + jit_uword_t pt : 1; + jit_uword_t avx512_pf : 1; + jit_uword_t avx512_er : 1; + jit_uword_t avx512_cd : 1; + jit_uword_t sha : 1; + jit_uword_t avx512_bw : 1; + jit_uword_t avx512_vl : 1; + } bits; + jit_uword_t cpuid; + } ebx; + union { + /* eax=0 */ struct { jit_uint32_t sse3 : 1; jit_uint32_t pclmulqdq : 1; @@ -271,6 +352,7 @@ jit_get_cpu(void) jit_uword_t cpuid; } ecx; union { + /* eax=0 */ struct { jit_uint32_t fpu : 1; jit_uint32_t vme : 1; @@ -310,7 +392,7 @@ jit_get_cpu(void) #if __X32 int ac, flags; #endif - jit_uword_t eax, ebx; + jit_uword_t eax; #if __X32 /* adapted from glibc __sysconf */ @@ -339,7 +421,7 @@ jit_get_cpu(void) #else __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" #endif - : "=a" (eax), "=r" (ebx), + : "=a" (eax), "=r" (ebx.cpuid), "=c" (ecx.cpuid), "=d" (edx.cpuid) : "0" (1)); @@ -361,6 +443,15 @@ jit_get_cpu(void) jit_cpu.aes = ecx.bits.aes; jit_cpu.avx = ecx.bits.avx; + /* query %eax = 7 and ecx = 0 function */ +#if __X64 + __asm__ volatile ("cpuid" + : "=a" (eax), "=b" (ebx.cpuid), "=c" (ecx), "=d" (edx) + : "a" (7), "c" (0)); +#endif + jit_cpu.adx = ebx.bits.adx; + + /* query %eax = 0x80000001 function */ #if __X64 # if __X64_32 @@ -368,10 +459,11 @@ jit_get_cpu(void) # else __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1" # endif - : "=a" (eax), "=r" (ebx), + : "=a" (eax), "=r" (ebx.cpuid), "=c" (ecx.cpuid), "=d" (edx.cpuid) : "0" (0x80000001)); - jit_cpu.lahf = ecx.cpuid & 1; + jit_cpu.lahf = !!(ecx.cpuid & 1); + jit_cpu.abm = !!(ecx.cpuid & 32); #endif } @@ -414,11 +506,15 @@ _jit_prolog(jit_state_t *_jit) _jitc->functions.length += 16; } _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++; - _jitc->function->self.size = stack_framesize; + /* One extra stack slot for implicit saved returned address */ + _jitc->function->self.size = stack_framesize + REAL_WORDSIZE; _jitc->function->self.argi = _jitc->function->self.argf = _jitc->function->self.aoff = _jitc->function->self.alen = 0; - /* sse/x87 conversion */ - _jitc->function->self.aoff = CVT_OFFSET; + _jitc->function->cvt_offset = 0; +#if __X64 && (__CYGWIN__ || _WIN32) + /* force framepointer */ + jit_check_frame(); +#endif _jitc->function->self.call = jit_call_default; jit_alloc((jit_pointer_t *)&_jitc->function->regoff, _jitc->reglen * sizeof(jit_int32_t)); @@ -444,6 +540,13 @@ jit_int32_t _jit_allocai(jit_state_t *_jit, jit_int32_t length) { assert(_jitc->function); + jit_check_frame(); +#if __X32 + /* Stack is 4 bytes aligned but jit functions keep it 8 bytes aligned. + * Called functions have 16 byte aligned stack. */ + if (!_jitc->function->self.aoff) + _jitc->function->self.aoff = -4; +#endif switch (length) { case 0: case 1: break; case 2: _jitc->function->self.aoff &= -2; break; @@ -500,22 +603,18 @@ _jit_ret(jit_state_t *_jit) } void -_jit_retr(jit_state_t *_jit, jit_int32_t u) -{ - jit_inc_synth_w(retr, u); - /* movr(%ret, %ret) would be optimized out */ - if (JIT_RET != u) - jit_movr(JIT_RET, u); - /* explicitly tell it is live */ - jit_live(JIT_RET); +_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) +{ + jit_code_inc_synth_w(code, u); + jit_movr(JIT_RET, u); jit_ret(); jit_dec_synth(); } void -_jit_reti(jit_state_t *_jit, jit_word_t u) +_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code) { - jit_inc_synth_w(reti, u); + jit_code_inc_synth_w(code, u); jit_movi(JIT_RET, u); jit_ret(); jit_dec_synth(); @@ -575,7 +674,7 @@ _jit_epilog(jit_state_t *_jit) jit_bool_t _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u) { - if (u->code == jit_code_arg) + if (u->code >= jit_code_arg_c && u->code <= jit_code_arg) return (jit_arg_reg_p(u->u.w)); assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d); return (jit_arg_f_reg_p(u->u.w)); @@ -585,6 +684,7 @@ void _jit_ellipsis(jit_state_t *_jit) { jit_inc_synth(ellipsis); + jit_check_frame(); if (_jitc->prepare) { jit_link_prepare(); /* Remember that a varargs function call is being constructed. */ @@ -629,12 +729,15 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u) } jit_node_t * -_jit_arg(jit_state_t *_jit) +_jit_arg(jit_state_t *_jit, jit_code_t code) { jit_node_t *node; jit_int32_t offset; assert(_jitc->function); assert(!(_jitc->function->self.call & jit_call_varargs)); +#if STRONG_TYPE_CHECKING + assert(code >= jit_code_arg_c && code <= jit_code_arg); +#endif #if __X64 if (jit_arg_reg_p(_jitc->function->self.argi)) { offset = _jitc->function->self.argi++; @@ -647,8 +750,9 @@ _jit_arg(jit_state_t *_jit) { offset = _jitc->function->self.size; _jitc->function->self.size += REAL_WORDSIZE; + jit_check_frame(); } - node = jit_new_node_ww(jit_code_arg, offset, + node = jit_new_node_ww(code, offset, ++_jitc->function->self.argn); jit_link_prolog(); return (node); @@ -676,6 +780,7 @@ _jit_arg_f(jit_state_t *_jit) { offset = _jitc->function->self.size; _jitc->function->self.size += REAL_WORDSIZE; + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_f, offset, ++_jitc->function->self.argn); @@ -705,6 +810,7 @@ _jit_arg_d(jit_state_t *_jit) { offset = _jitc->function->self.size; _jitc->function->self.size += sizeof(jit_float64_t); + jit_check_frame(); } node = jit_new_node_ww(jit_code_arg_d, offset, ++_jitc->function->self.argn); @@ -715,63 +821,75 @@ _jit_arg_d(jit_state_t *_jit) void _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_c, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) jit_extr_c(u, JIT_RA0 - v->u.w); else #endif - jit_ldxi_c(u, _RBP, v->u.w); + { + jit_node_t *node = jit_ldxi_c(u, _RBP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_c); jit_inc_synth_wp(getarg_uc, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) jit_extr_uc(u, JIT_RA0 - v->u.w); else #endif - jit_ldxi_uc(u, _RBP, v->u.w); + { + jit_node_t *node = jit_ldxi_uc(u, _RBP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_s, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) jit_extr_s(u, JIT_RA0 - v->u.w); else #endif - jit_ldxi_s(u, _RBP, v->u.w); + { + jit_node_t *node = jit_ldxi_s(u, _RBP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_s); jit_inc_synth_wp(getarg_us, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) jit_extr_us(u, JIT_RA0 - v->u.w); else #endif - jit_ldxi_us(u, _RBP, v->u.w); + { + jit_node_t *node = jit_ldxi_us(u, _RBP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_i, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) { @@ -783,7 +901,10 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) } else #endif - jit_ldxi_i(u, _RBP, v->u.w); + { + jit_node_t *node = jit_ldxi_i(u, _RBP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -791,57 +912,66 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) void _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_i); jit_inc_synth_wp(getarg_ui, u, v); if (jit_arg_reg_p(v->u.w)) jit_extr_ui(u, JIT_RA0 - v->u.w); - else - jit_ldxi_ui(u, _RBP, v->u.w); + else { + jit_node_t *node = jit_ldxi_ui(u, _RBP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } void _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) { - assert(v->code == jit_code_arg); + assert_arg_type(v->code, jit_code_arg_l); jit_inc_synth_wp(getarg_l, u, v); if (jit_arg_reg_p(v->u.w)) jit_movr(u, JIT_RA0 - v->u.w); - else - jit_ldxi_l(u, _RBP, v->u.w); + else { + jit_node_t *node = jit_ldxi_l(u, _RBP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } #endif void -_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) +_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code) { - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargr, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) jit_movr(JIT_RA0 - v->u.w, u); else #endif - jit_stxi(v->u.w, _RBP, u); + { + jit_node_t *node = jit_stxi(v->u.w, _RBP, u); + jit_link_alist(node); + } jit_dec_synth(); } void -_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v) +_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code) { jit_int32_t regno; - assert(v->code == jit_code_arg); - jit_inc_synth_wp(putargi, u, v); + assert_putarg_type(code, v->code); + jit_code_inc_synth_wp(code, u, v); #if __X64 if (jit_arg_reg_p(v->u.w)) jit_movi(JIT_RA0 - v->u.w, u); else #endif { + jit_node_t *node; regno = jit_get_reg(jit_class_gpr); jit_movi(regno, u); - jit_stxi(v->u.w, _RBP, regno); + node = jit_stxi(v->u.w, _RBP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -857,7 +987,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_f(u, _XMM0 - v->u.w); else #endif - jit_ldxi_f(u, _RBP, v->u.w); + { + jit_node_t *node = jit_ldxi_f(u, _RBP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -867,11 +1000,14 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) assert(v->code == jit_code_arg_f); jit_inc_synth_wp(putargr_f, u, v); #if __X64 - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_f_reg_p(v->u.w)) jit_movr_f(_XMM0 - v->u.w, u); else #endif - jit_stxi_f(v->u.w, _RBP, u); + { + jit_node_t *node = jit_stxi_f(v->u.w, _RBP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -882,14 +1018,16 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v) assert(v->code == jit_code_arg_f); jit_inc_synth_fp(putargi_f, u, v); #if __X64 - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_f_reg_p(v->u.w)) jit_movi_f(_XMM0 - v->u.w, u); else #endif { - regno = jit_get_reg(jit_class_gpr); + jit_node_t *node; + regno = jit_get_reg(jit_class_fpr); jit_movi_f(regno, u); - jit_stxi_f(v->u.w, _RBP, regno); + node = jit_stxi_f(v->u.w, _RBP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); @@ -905,7 +1043,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) jit_movr_d(u, _XMM0 - v->u.w); else #endif - jit_ldxi_d(u, _RBP, v->u.w); + { + jit_node_t *node = jit_ldxi_d(u, _RBP, v->u.w); + jit_link_alist(node); + } jit_dec_synth(); } @@ -915,11 +1056,14 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v) assert(v->code == jit_code_arg_d); jit_inc_synth_wp(putargr_d, u, v); #if __X64 - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_f_reg_p(v->u.w)) jit_movr_d(_XMM0 - v->u.w, u); else #endif - jit_stxi_d(v->u.w, _RBP, u); + { + jit_node_t *node = jit_stxi_d(v->u.w, _RBP, u); + jit_link_alist(node); + } jit_dec_synth(); } @@ -930,24 +1074,26 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v) assert(v->code == jit_code_arg_d); jit_inc_synth_dp(putargi_d, u, v); #if __X64 - if (jit_arg_reg_p(v->u.w)) + if (jit_arg_f_reg_p(v->u.w)) jit_movi_d(_XMM0 - v->u.w, u); else #endif { - regno = jit_get_reg(jit_class_gpr); + jit_node_t *node; + regno = jit_get_reg(jit_class_fpr); jit_movi_d(regno, u); - jit_stxi_d(v->u.w, _RBP, regno); + node = jit_stxi_d(v->u.w, _RBP, regno); + jit_link_alist(node); jit_unget_reg(regno); } jit_dec_synth(); } void -_jit_pushargr(jit_state_t *_jit, jit_int32_t u) +_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code) { assert(_jitc->function); - jit_inc_synth_w(pushargr, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); #if __X64 if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -964,16 +1110,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u) { jit_stxi(_jitc->function->call.size, _RSP, u); _jitc->function->call.size += REAL_WORDSIZE; + jit_check_frame(); } jit_dec_synth(); } void -_jit_pushargi(jit_state_t *_jit, jit_word_t u) +_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code) { jit_int32_t regno; assert(_jitc->function); - jit_inc_synth_w(pushargi, u); + jit_code_inc_synth_w(code, u); jit_link_prepare(); #if __X64 if (jit_arg_reg_p(_jitc->function->call.argi)) { @@ -994,6 +1141,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u) jit_stxi(_jitc->function->call.size, _RSP, regno); _jitc->function->call.size += REAL_WORDSIZE; jit_unget_reg(regno); + jit_check_frame(); } jit_dec_synth(); } @@ -1028,6 +1176,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u) { jit_stxi_f(_jitc->function->call.size, _RSP, u); _jitc->function->call.size += REAL_WORDSIZE; + jit_check_frame(); } jit_dec_synth(); } @@ -1066,6 +1215,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u) jit_stxi_f(_jitc->function->call.size, _RSP, regno); _jitc->function->call.size += REAL_WORDSIZE; jit_unget_reg(regno); + jit_check_frame(); } jit_dec_synth(); } @@ -1100,6 +1250,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u) { jit_stxi_d(_jitc->function->call.size, _RSP, u); _jitc->function->call.size += sizeof(jit_float64_t); + jit_check_frame(); } jit_dec_synth(); } @@ -1138,6 +1289,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u) jit_stxi_d(_jitc->function->call.size, _RSP, regno); _jitc->function->call.size += sizeof(jit_float64_t); jit_unget_reg(regno); + jit_check_frame(); } jit_dec_synth(); } @@ -1171,6 +1323,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) jit_int32_t reg; jit_node_t *call; assert(_jitc->function); + jit_check_frame(); reg = r0; jit_inc_synth_w(finishr, r0); if (_jitc->function->self.alen < _jitc->function->call.size) @@ -1203,32 +1356,26 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0) jit_node_t * _jit_finishi(jit_state_t *_jit, jit_pointer_t i0) { -#if __X64 - jit_int32_t reg; -#endif jit_node_t *node; assert(_jitc->function); + jit_check_frame(); jit_inc_synth_w(finishi, (jit_word_t)i0); if (_jitc->function->self.alen < _jitc->function->call.size) _jitc->function->self.alen = _jitc->function->call.size; #if __X64 - /* FIXME preventing %rax allocation is good enough, but for consistency - * it should automatically detect %rax is dead, in case it has run out - * registers, and not save/restore it, what would be wrong if using the - * the return value, otherwise, just a needless noop */ - /* >> prevent %rax from being allocated as the function pointer */ - jit_regset_setbit(&_jitc->regarg, _RAX); - reg = jit_get_reg(jit_class_gpr); - node = jit_movi(reg, (jit_word_t)i0); - jit_finishr(reg); - jit_unget_reg(reg); - /* << prevent %rax from being allocated as the function pointer */ - jit_regset_clrbit(&_jitc->regarg, _RAX); -#else +# if !(__CYGWIN__ || _WIN32) + if (_jitc->function->call.call & jit_call_varargs) { + if (_jitc->function->call.argf) + jit_movi(_RAX, _jitc->function->call.argf); + else + jit_movi(_RAX, 0); + jit_live(_RAX); + } +# endif +#endif node = jit_calli(i0); node->v.w = _jitc->function->call.argi; node->w.w = _jitc->function->call.argf; -#endif _jitc->function->call.argi = _jitc->function->call.argf = _jitc->function->call.size = 0; _jitc->prepare = 0; @@ -1333,6 +1480,7 @@ _emit_code(jit_state_t *_jit) struct { jit_node_t *node; jit_word_t word; + jit_function_t func; #if DEVEL_DISASSEMBLER jit_word_t prevw; #endif @@ -1598,7 +1746,10 @@ _emit_code(jit_state_t *_jit) if ((word = _jit->pc.w & (node->u.w - 1))) nop(node->u.w - word); break; - case jit_code_note: case jit_code_name: + case jit_code_skip: + nop(node->u.w); + break; + case jit_code_note: case jit_code_name: node->u.w = _jit->pc.w; break; case jit_code_label: @@ -1654,6 +1805,10 @@ _emit_code(jit_state_t *_jit) case_rrw(rsh, _u); case_rr(neg,); case_rr(com,); + case_rr(clo,); + case_rr(clz,); + case_rr(cto,); + case_rr(ctz,); case_rrr(lt,); case_rrw(lt,); case_rrr(lt, _u); @@ -1695,7 +1850,14 @@ _emit_code(jit_state_t *_jit) else { assert(temp->code == jit_code_label || temp->code == jit_code_epilog); - word = movi_p(rn(node->u.w), node->v.w); +#if CAN_RIP_ADDRESS + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if ((jit_int32_t)word == word) + word = movi(rn(node->u.w), _jit->pc.w); + else +#endif + word = movi_p(rn(node->u.w), node->v.w); patch(word, node); } } @@ -2017,6 +2179,7 @@ _emit_code(jit_state_t *_jit) case_bff(unord, _d); case_bfw(unord, _d, 64); case jit_code_jmpr: + jit_check_frame(); jmpr(rn(node->u.w)); break; case jit_code_jmpi: @@ -2027,14 +2190,24 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) jmpi(temp->u.w); else { - word = jmpi_p(_jit->pc.w); +#if __X64 + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if ((jit_int32_t)word == word) + word = jmpi(_jit->pc.w); + else +#endif + word = jmpi_p(_jit->pc.w); patch(word, node); } } - else + else { + jit_check_frame(); jmpi(node->u.w); + } break; case jit_code_callr: + jit_check_frame(); callr(rn(node->u.w)); break; case jit_code_calli: @@ -2045,22 +2218,34 @@ _emit_code(jit_state_t *_jit) if (temp->flag & jit_flag_patch) calli(temp->u.w); else { - word = calli_p(_jit->pc.w); +#if __X64 + word = _jit->code.length - + (_jit->pc.uc - _jit->code.ptr); + if ((jit_int32_t)word == word) + word = calli(_jit->pc.w); + else +#endif + word = calli_p(_jit->pc.w); patch(word, node); } } - else + else { + jit_check_frame(); calli(node->u.w); + } break; case jit_code_prolog: _jitc->function = _jitc->functions.ptr + node->w.w; undo.node = node; undo.word = _jit->pc.w; + memcpy(&undo.func, _jitc->function, sizeof(undo.func)); #if DEVEL_DISASSEMBLER undo.prevw = prevw; #endif undo.patch_offset = _jitc->patches.offset; restart_function: + compute_framesize(); + patch_alist(0); _jitc->again = 0; prolog(node); break; @@ -2076,10 +2261,29 @@ _emit_code(jit_state_t *_jit) temp->flag &= ~jit_flag_patch; node = undo.node; _jit->pc.w = undo.word; + /* undo.func.self.aoff and undo.func.regset should not + * be undone, as they will be further updated, and are + * the reason of the undo. */ + undo.func.self.aoff = _jitc->function->frame + + _jitc->function->self.aoff; + undo.func.need_frame = _jitc->function->need_frame; + jit_regset_set(&undo.func.regset, &_jitc->function->regset); + /* allocar information also does not need to be undone */ + undo.func.aoffoff = _jitc->function->aoffoff; + undo.func.allocar = _jitc->function->allocar; + /* real stack framesize is not in the jit_function_t, + * if it were, would need to not be undone */ + /* cvt_offset must also not be undone */ + undo.func.cvt_offset = _jitc->function->cvt_offset; + /* this will be recomputed but undo anyway to have it + * better self documented.*/ + undo.func.need_stack = _jitc->function->need_stack; + memcpy(_jitc->function, &undo.func, sizeof(undo.func)); #if DEVEL_DISASSEMBLER prevw = undo.prevw; #endif _jitc->patches.offset = undo.patch_offset; + patch_alist(1); goto restart_function; } if (node->link && @@ -2103,11 +2307,23 @@ _emit_code(jit_state_t *_jit) case jit_code_live: case jit_code_ellipsis: case jit_code_va_push: case jit_code_allocai: case jit_code_allocar: - case jit_code_arg: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: +# if __WORDSIZE == 64 + case jit_code_arg_l: +# endif case jit_code_arg_f: case jit_code_arg_d: case jit_code_va_end: case jit_code_ret: - case jit_code_retr: case jit_code_reti: + case jit_code_retr_c: case jit_code_reti_c: + case jit_code_retr_uc: case jit_code_reti_uc: + case jit_code_retr_s: case jit_code_reti_s: + case jit_code_retr_us: case jit_code_reti_us: + case jit_code_retr_i: case jit_code_reti_i: +#if __WORDSIZE == 64 + case jit_code_retr_ui: case jit_code_reti_ui: + case jit_code_retr_l: case jit_code_reti_l: +#endif case jit_code_retr_f: case jit_code_reti_f: case jit_code_retr_d: case jit_code_reti_d: case jit_code_getarg_c: case jit_code_getarg_uc: @@ -2117,10 +2333,26 @@ _emit_code(jit_state_t *_jit) case jit_code_getarg_ui: case jit_code_getarg_l: #endif case jit_code_getarg_f: case jit_code_getarg_d: - case jit_code_putargr: case jit_code_putargi: + case jit_code_putargr_c: case jit_code_putargi_c: + case jit_code_putargr_uc: case jit_code_putargi_uc: + case jit_code_putargr_s: case jit_code_putargi_s: + case jit_code_putargr_us: case jit_code_putargi_us: + case jit_code_putargr_i: case jit_code_putargi_i: +#if __WORDSIZE == 64 + case jit_code_putargr_ui: case jit_code_putargi_ui: + case jit_code_putargr_l: case jit_code_putargi_l: +#endif case jit_code_putargr_f: case jit_code_putargi_f: case jit_code_putargr_d: case jit_code_putargi_d: - case jit_code_pushargr: case jit_code_pushargi: + case jit_code_pushargr_c: case jit_code_pushargi_c: + case jit_code_pushargr_uc: case jit_code_pushargi_uc: + case jit_code_pushargr_s: case jit_code_pushargi_s: + case jit_code_pushargr_us: case jit_code_pushargi_us: + case jit_code_pushargr_i: case jit_code_pushargi_i: +#if __WORDSIZE == 64 + case jit_code_pushargr_ui: case jit_code_pushargi_ui: + case jit_code_pushargr_l: case jit_code_pushargi_l: +#endif case jit_code_pushargr_f: case jit_code_pushargi_f: case jit_code_pushargr_d: case jit_code_pushargi_d: case jit_code_retval_c: case jit_code_retval_uc: @@ -2182,7 +2414,7 @@ _emit_code(jit_state_t *_jit) for (offset = 0; offset < _jitc->patches.offset; offset++) { node = _jitc->patches.ptr[offset].node; word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w; - patch_at(node, _jitc->patches.ptr[offset].inst, word); + patch_at(_jitc->patches.ptr[offset].inst, word); } jit_flush(_jit->code.ptr, _jit->pc.uc); @@ -2231,6 +2463,26 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1) sse_stxi_d(i0, rn(r0), rn(r1)); } +static void +_compute_framesize(jit_state_t *_jit) +{ + jit_int32_t reg; + /* Save stack pointer in first slot */ + _jitc->framesize = REAL_WORDSIZE; + for (reg = 0; reg < jit_size(iregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) + _jitc->framesize += REAL_WORDSIZE; + +#if __X64 && (__CYGWIN__ || _WIN32) + for (reg = 0; reg < jit_size(fregs); reg++) + if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) + _jitc->framesize += sizeof(jit_float64_t); +#endif + /* Make sure functions called have a 16 byte aligned stack */ + _jitc->framesize = (_jitc->framesize + 15) & -16; + _jitc->framesize += 16 - REAL_WORDSIZE; +} + static void _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) { @@ -2256,6 +2508,7 @@ _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node) static void _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { + CHECK_CVT_OFFSET(); x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1); sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET); } @@ -2263,6 +2516,7 @@ _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { + CHECK_CVT_OFFSET(); x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1); sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); } @@ -2270,6 +2524,7 @@ _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { + CHECK_CVT_OFFSET(); sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1); x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET); } @@ -2277,6 +2532,7 @@ _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) static void _x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) { + CHECK_CVT_OFFSET(); sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1); x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET); } diff --git a/deps/lightning/lib/lightning.c b/deps/lightning/lib/lightning.c index 49244b51..b0b0ef72 100644 --- a/deps/lightning/lib/lightning.c +++ b/deps/lightning/lib/lightning.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2022 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -227,8 +227,25 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec) for (regno = 0; regno < _jitc->reglen; regno++) { if ((jit_class(_rvs[regno].spec) & spec) == spec && !jit_regset_tstbit(&_jitc->regarg, regno) && - !jit_regset_tstbit(&_jitc->reglive, regno)) + !jit_regset_tstbit(&_jitc->reglive, regno)) { + if (jit_regset_tstbit(&_jitc->regmask, regno)) { + /* search further, attempting to find a truly known + * free register, not just one in unknown state. */ + jit_int32_t regfree; + + for (regfree = regno + 1; + regfree < _jitc->reglen; regfree++) { + if ((jit_class(_rvs[regfree].spec) & spec) == spec && + !jit_regset_tstbit(&_jitc->regarg, regfree) && + !jit_regset_tstbit(&_jitc->reglive, regfree) && + !jit_regset_tstbit(&_jitc->regmask, regfree)) { + regno = regfree; + break; + } + } + } goto regarg; + } } /* search for a register matching spec that is not an argument @@ -874,6 +891,7 @@ jit_new_state(void) jit_regset_new(&_jitc->regsav); jit_regset_new(&_jitc->reglive); jit_regset_new(&_jitc->regmask); + jit_regset_new(&_jitc->explive); jit_init(); @@ -1335,14 +1353,36 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) mask = 0; break; case jit_code_live: case jit_code_va_end: - case jit_code_retr: case jit_code_retr_f: case jit_code_retr_d: - case jit_code_pushargr: case jit_code_pushargr_f: + case jit_code_retr_c: case jit_code_retr_uc: + case jit_code_retr_s: case jit_code_retr_us: + case jit_code_retr_i: case jit_code_retr_ui: + case jit_code_retr_l: + case jit_code_retr_f: case jit_code_retr_d: + case jit_code_pushargr_c: + case jit_code_pushargr_uc: + case jit_code_pushargr_s: + case jit_code_pushargr_us: + case jit_code_pushargr_i: + case jit_code_pushargr_ui: + case jit_code_pushargr_l: + case jit_code_pushargr_f: case jit_code_pushargr_d: case jit_code_finishr: /* synthesized will set jit_cc_a0_jmp */ mask = jit_cc_a0_reg; break; - case jit_code_align: case jit_code_reti: case jit_code_pushargi: - case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */ + case jit_code_align: case jit_code_skip: + case jit_code_reti_c: case jit_code_reti_uc: + case jit_code_reti_s: case jit_code_reti_us: + case jit_code_reti_i: case jit_code_reti_ui: + case jit_code_reti_l: + case jit_code_pushargi_c: + case jit_code_pushargi_uc: + case jit_code_pushargi_s: + case jit_code_pushargi_us: + case jit_code_pushargi_i: + case jit_code_pushargi_ui: + case jit_code_pushargi_l: + case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */ mask = jit_cc_a0_int; break; case jit_code_reti_f: case jit_code_pushargi_f: @@ -1354,7 +1394,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_allocai: mask = jit_cc_a0_int|jit_cc_a1_int; break; - case jit_code_arg: case jit_code_arg_f: case jit_code_arg_d: + case jit_code_arg_c: case jit_code_arg_s: + case jit_code_arg_i: case jit_code_arg_l: + case jit_code_arg_f: case jit_code_arg_d: mask = jit_cc_a0_int|jit_cc_a0_arg; break; case jit_code_calli: case jit_code_jmpi: @@ -1378,11 +1420,17 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_getarg_f: case jit_code_getarg_d: mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_arg; break; - case jit_code_putargr: case jit_code_putargr_f: - case jit_code_putargr_d: + case jit_code_putargr_c:case jit_code_putargr_uc: + case jit_code_putargr_s:case jit_code_putargr_us: + case jit_code_putargr_i:case jit_code_putargr_ui: + case jit_code_putargr_l: + case jit_code_putargr_f:case jit_code_putargr_d: mask = jit_cc_a0_reg|jit_cc_a1_arg; break; - case jit_code_putargi: + case jit_code_putargi_c:case jit_code_putargi_uc: + case jit_code_putargi_s:case jit_code_putargi_us: + case jit_code_putargi_i:case jit_code_putargi_ui: + case jit_code_putargi_l: mask = jit_cc_a0_int|jit_cc_a1_arg; break; case jit_code_putargi_f: @@ -1422,6 +1470,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code) case jit_code_negr_d: case jit_code_absr_d: case jit_code_sqrtr_d: case jit_code_movr_d: case jit_code_extr_d: case jit_code_extr_f_d: case jit_code_ldr_d: + case jit_code_clor: case jit_code_clzr: + case jit_code_ctor: case jit_code_ctzr: case jit_code_movr_w_f: case jit_code_movr_f_w: case jit_code_movr_w_d: case jit_code_movr_d_w: case jit_code_va_arg: case jit_code_va_arg_d: @@ -1648,8 +1698,14 @@ _do_setup(jit_state_t *_jit) * at the start of a basic block */ for (offset = 0; offset < _jitc->blocks.offset; offset++) { block = _jitc->blocks.ptr + offset; - if (!block->label || block->label->code == jit_code_epilog) + if (!block->label) continue; + if (block->label->code == jit_code_epilog) { + jit_regset_setbit(&block->reglive, JIT_RET); + jit_regset_setbit(&block->reglive, JIT_FRET); + jit_regset_com(&block->regmask, &block->reglive); + continue; + } jit_setup(block); } } @@ -1750,7 +1806,7 @@ _check_block_again(jit_state_t *_jit) } while (todo); - return (1); + return (todo); } static void @@ -1781,6 +1837,7 @@ _jit_optimize(jit_state_t *_jit) jit_node_t *node; jit_block_t *block; jit_word_t offset; + jit_regset_t regmask; todo = 0; _jitc->function = NULL; @@ -1795,15 +1852,31 @@ _jit_optimize(jit_state_t *_jit) if (simplify()) todo = 1; - /* Figure out labels that are only reached with a jump - * and is required to do a simple redundant_store removal - * on jit_beqi below */ + jit_regset_set_ui(®mask, 0); + for (offset = 0; offset < _jitc->reglen; offset++) { + if ((jit_class(_rvs[offset].spec) & (jit_class_gpr|jit_class_fpr)) && + (jit_class(_rvs[offset].spec) & jit_class_sav) == jit_class_sav) + jit_regset_setbit(®mask, offset); + } + + /* Figure out labels that are only reached with a jump */ jump = 1; for (node = _jitc->head; node; node = node->next) { switch (node->code) { case jit_code_label: - if (!jump) + if (!jump) { node->flag |= jit_flag_head; + if (!node->link) { + /* Block is dead code or only reachable with an + * indirect jumps. In such condition, must assume + * all callee save registers are live. */ + block = _jitc->blocks.ptr + node->v.w; + jit_regset_ior(&block->reglive, + &block->reglive, ®mask); + /* Cleanup regmask */ + block_update_set(block, block); + } + } break; case jit_code_jmpi: case jit_code_jmpr: case jit_code_epilog: @@ -1932,6 +2005,10 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node) case jit_code_label: case jit_code_prolog: case jit_code_epilog: block = _jitc->blocks.ptr + node->v.w; jit_regset_set(&_jitc->reglive, &block->reglive); + jit_regset_set_ui(&_jitc->explive, 0); + break; + case jit_code_live: + jit_regset_setbit(&_jitc->explive, node->u.w); break; case jit_code_callr: value = jit_regno(node->u.w); @@ -2043,6 +2120,19 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value) else jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w)); } + /* Prevent incorrect detection of running out of registers + * if will need to patch jump, and all registers have been + * used in the current block. */ + if (node->code == jit_code_jmpi && (node->flag & jit_flag_node)) { + jit_node_t *label = node->u.n; + jit_block_t *block = _jitc->blocks.ptr + label->v.w; + jit_regset_set(&_jitc->reglive, &block->reglive); + jit_regset_set(&_jitc->regmask, &block->regmask); + if (jit_regset_set_p(&_jitc->explive)) { + jit_regset_ior(&_jitc->reglive, &block->reglive, &_jitc->explive); + jit_regset_xor(&_jitc->regmask, &_jitc->regmask, &_jitc->explive); + } + } } void @@ -2244,7 +2334,7 @@ _jit_emit(jit_state_t *_jit) #else if (!_jit->user_code) { mmap_prot = PROT_READ | PROT_WRITE; -#if !__OpenBSD__ +#if !(__OpenBSD__ || __APPLE__) mmap_prot |= PROT_EXEC; #endif #if __NetBSD__ @@ -2307,8 +2397,7 @@ _jit_emit(jit_state_t *_jit) # endif #else _jit->code.ptr = mmap(NULL, length, - PROT_EXEC | PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, mmap_fd, 0); + mmap_prot, mmap_flags, mmap_fd, 0); #endif assert(_jit->code.ptr != MAP_FAILED); @@ -2340,12 +2429,12 @@ _jit_emit(jit_state_t *_jit) assert(result == 0); } if (!_jit->user_code) { - length = _jit->pc.uc - _jit->code.ptr; + _jit->code.protected = _jit->pc.uc - _jit->code.ptr; # if __riscv && __WORDSIZE == 64 /* FIXME should start adding consts at a page boundary */ - length -= _jitc->consts.hash.count * sizeof(jit_word_t); + _jit->code.protected -= _jitc->consts.hash.count * sizeof(jit_word_t); # endif - result = mprotect(_jit->code.ptr, length, PROT_READ | PROT_EXEC); + result = mprotect(_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC); assert(result == 0); } #endif /* HAVE_MMAP */ @@ -2355,6 +2444,32 @@ fail: return (NULL); } +void +_jit_protect(jit_state_t *_jit) +{ +#if !HAVE_MMAP + assert (_jit->user_code); +#else + int result; + if (_jit->user_code) return; + result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC); + assert (result == 0); +#endif +} + +void +_jit_unprotect(jit_state_t *_jit) +{ +#if !HAVE_MMAP + assert (_jit->user_code); +#else + int result; + if (_jit->user_code) return; + result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_WRITE); + assert (result == 0); +#endif +} + void _jit_frame(jit_state_t *_jit, jit_int32_t frame) { @@ -2786,6 +2901,9 @@ _jit_update(jit_state_t *_jit, jit_node_t *node, * to jump to unknown location. */ /* Treat all callee save as live. */ jit_regset_ior(live, live, mask); + /* Prevent explicitly set as live registers to + * be used as a temporary for the jmpi. */ + jit_regset_ior(live, live, &_jitc->explive); /* Treat anything else as dead. */ return; } @@ -2853,7 +2971,10 @@ _sequential_labels(jit_state_t *_jit) if ((jump = node->link)) { for (; jump; jump = link) { link = jump->link; - jump->u.n = prev; + if (jump->code == jit_code_movi) + jump->v.n = prev; + else + jump->u.n = prev; jump->link = prev->link; prev->link = jump; } @@ -2867,7 +2988,10 @@ _sequential_labels(jit_state_t *_jit) if ((jump = next->link)) { for (; jump; jump = link) { link = jump->link; - jump->u.n = node; + if (jump->code == jit_code_movi) + jump->v.n = node; + else + jump->u.n = node; jump->link = node->link; node->link = jump; } @@ -3022,7 +3146,6 @@ _redundant_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node) } break; case jit_code_name: case jit_code_note: - case jit_code_align: break; default: return (0); @@ -3073,7 +3196,7 @@ reverse_jump_code(jit_code_t code) case jit_code_bgti_f: return (jit_code_bunlei_f); case jit_code_bner_f: return (jit_code_beqr_f); - case jit_code_bnei_f: return (jit_code_beqr_f); + case jit_code_bnei_f: return (jit_code_beqi_f); case jit_code_bunltr_f: return (jit_code_bger_f); case jit_code_bunlti_f: return (jit_code_bgei_f); @@ -3860,6 +3983,9 @@ static maybe_unused void generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1); #endif +#define patch_alist(revert) _patch_alist(_jit, revert) +static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert); + #if defined(__i386__) || defined(__x86_64__) # include "jit_x86.c" #elif defined(__mips__) @@ -3929,3 +4055,40 @@ generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1) jit_unget_reg(reg); } #endif + +#if defined(stack_framesize) +static maybe_unused void +_patch_alist(jit_state_t *_jit, jit_bool_t revert) +{ + jit_int32_t diff; + jit_node_t *node; + diff = jit_diffsize(); + if (diff) { + if (revert) + diff = -diff; + for (node = _jitc->function->alist; node; node = node->link) { + switch (node->code) { + case jit_code_ldxi_c: case jit_code_ldxi_uc: + case jit_code_ldxi_s: case jit_code_ldxi_us: + case jit_code_ldxi_i: +#if __WORDSIZE == 64 + case jit_code_ldxi_ui: case jit_code_ldxi_l: +#endif + case jit_code_ldxi_f: case jit_code_ldxi_d: + node->w.w -= diff; + break; + case jit_code_stxi_c: case jit_code_stxi_s: + case jit_code_stxi_i: +#if __WORDSIZE == 64 + case jit_code_stxi_l: +#endif + case jit_code_stxi_f: case jit_code_stxi_d: + node->u.w -= diff; + break; + default: + abort(); + } + } + } +} +#endif diff --git a/deps/lightning/size.c b/deps/lightning/size.c index 1728fb2e..1f31ed60 100644 --- a/deps/lightning/size.c +++ b/deps/lightning/size.c @@ -68,14 +68,6 @@ main(int argc, char *argv[]) # else fprintf(fp, "#if !defined(__ARM_PCS_VFP)\n"); # endif -#elif defined(__mips__) -# if __WORDSIZE == 32 -# if NEW_ABI - fprintf(fp, "#if NEW_ABI\n"); -# else - fprintf(fp, "#if !NEW_ABI\n"); -# endif -# endif #elif defined(__powerpc__) fprintf(fp, "#if defined(__powerpc__)\n"); fprintf(fp, "#if __BYTE_ORDER == %s\n", @@ -94,10 +86,6 @@ main(int argc, char *argv[]) fprintf(fp, " %d, /* %s */\n", _szs[offset], code_name[offset]); #if defined(__arm__) fprintf(fp, "#endif /* __ARM_PCS_VFP */\n"); -#elif defined(__mips__) -# if __WORDSIZE == 32 - fprintf(fp, "#endif /* NEW_ABI */\n"); -# endif #elif defined(__powerpc__) # if __WORDSIZE == 32 fprintf(fp, "#endif /* " diff --git a/deps/lightrec/.gitrepo b/deps/lightrec/.gitrepo index 6e8794f0..8a344c4f 100644 --- a/deps/lightrec/.gitrepo +++ b/deps/lightrec/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/pcercuei/lightrec.git branch = master - commit = 3ff589bcb7d52b3a091fe0b922ba02a0b1a7f095 - parent = aced3eb3fcaa0fe13c44c4dd196cdab42555fd98 + commit = fcf239e7e9d42fedb7a8de64057d6895acf3ceee + parent = 03ec8a8c606eb87642be336632e1792ab89650d8 method = merge cmdver = 0.4.3 diff --git a/deps/lightrec/CMakeLists.txt b/deps/lightrec/CMakeLists.txt index 12da14ea..9518a9ab 100644 --- a/deps/lightrec/CMakeLists.txt +++ b/deps/lightrec/CMakeLists.txt @@ -66,11 +66,11 @@ endif (ENABLE_THREADED_COMPILER) option(OPT_REMOVE_DIV_BY_ZERO_SEQ "(optimization) Remove div-by-zero check sequence" ON) option(OPT_REPLACE_MEMSET "(optimization) Detect and replace memset with host variant" ON) option(OPT_DETECT_IMPOSSIBLE_BRANCHES "(optimization) Detect impossible branches" ON) +option(OPT_HANDLE_LOAD_DELAYS "(optimization) Detect load delays" ON) option(OPT_TRANSFORM_OPS "(optimization) Transform opcodes" ON) option(OPT_LOCAL_BRANCHES "(optimization) Detect local branches" ON) option(OPT_SWITCH_DELAY_SLOTS "(optimization) Switch delay slots" ON) -option(OPT_FLAG_STORES "(optimization) Flag stores that don't require invalidation" ON) -option(OPT_FLAG_IO "(optimization) Flag I/O opcodes whose target is known" ON) +option(OPT_FLAG_IO "(optimization) Flag I/O opcodes when the target can be detected" ON) option(OPT_FLAG_MULT_DIV "(optimization) Flag MULT/DIV that only use one of HI/LO" ON) option(OPT_EARLY_UNLOAD "(optimization) Unload registers early" ON) diff --git a/deps/lightrec/README.md b/deps/lightrec/README.md index ab2c13b5..449e06c1 100644 --- a/deps/lightrec/README.md +++ b/deps/lightrec/README.md @@ -17,8 +17,7 @@ a form of Intermediate Representation (IR). Basically, just a single-linked list of structures representing the instructions. On that list, several optimization steps are performed: instructions are modified, reordered, tagged; new meta-instructions -can be added, for instance to tell the code generator that a certain -register won't be used anymore. +can also be added. * __Lazy compilation__. If Lightrec detects a block of code that would be very hard to @@ -46,10 +45,12 @@ typically happens when a lot of new code is run. Lightrec has been ported to the following emulators: -* [__PCSX-ReArmed__ (my own fork)](https://github.com/pcercuei/pcsx_rearmed) +* [__PCSX-ReArmed__ (libretro)](https://github.com/libretro/pcsx_rearmed) * [__pcsx4all__ (my own fork)](https://github.com/pcercuei/pcsx4all) * [__Beetle__ (libretro)](https://github.com/libretro/beetle-psx-libretro/) +* [__CubeSX/WiiSX__](https://github.com/emukidid/pcsxgc/) + [![Star History Chart](https://api.star-history.com/svg?repos=pcercuei/lightrec&type=Date)](https://star-history.com/#pcercuei/lightrec&Date) diff --git a/deps/lightrec/constprop.c b/deps/lightrec/constprop.c index 353f42f1..8499c6ec 100644 --- a/deps/lightrec/constprop.c +++ b/deps/lightrec/constprop.c @@ -243,12 +243,13 @@ static void lightrec_propagate_slt(u32 rs, u32 rd, bool is_signed, } } -void lightrec_consts_propagate(const struct opcode *list, +void lightrec_consts_propagate(const struct block *block, unsigned int idx, struct constprop_data *v) { + const struct opcode *list = block->opcode_list; union code c; - u32 imm; + u32 imm, flags; if (idx == 0) return; @@ -263,8 +264,13 @@ void lightrec_consts_propagate(const struct opcode *list, return; } - if (idx > 1 && !op_flag_sync(list[idx - 1].flags)) { - c = list[idx - 2].c; + flags = list[idx - 1].flags; + + if (idx > 1 && !op_flag_sync(flags)) { + if (op_flag_no_ds(flags)) + c = list[idx - 1].c; + else + c = list[idx - 2].c; switch (c.i.op) { case OP_BNE: @@ -449,6 +455,13 @@ void lightrec_consts_propagate(const struct opcode *list, v[c.r.rd].known = 0; v[c.r.rd].sign = 0; break; + + case OP_SPECIAL_JALR: + v[c.r.rd].known = 0xffffffff; + v[c.r.rd].sign = 0; + v[c.r.rd].value = block->pc + (idx + 2 << 2); + break; + default: break; } @@ -644,7 +657,7 @@ void lightrec_consts_propagate(const struct opcode *list, imm = imm ? GENMASK(31, 32 - imm) : 0; v[c.i.rt].sign = 0; } - v[c.i.rt].known &= ~imm; + v[c.i.rt].known &= imm; break; } fallthrough; @@ -652,30 +665,48 @@ void lightrec_consts_propagate(const struct opcode *list, v[c.i.rt].known = 0; v[c.i.rt].sign = 0; break; - case OP_META_MOV: - v[c.r.rd] = v[c.r.rs]; - break; - case OP_META_EXTC: - v[c.i.rt].value = (s32)(s8)v[c.i.rs].value; - if (v[c.i.rs].known & BIT(7)) { - v[c.i.rt].known = v[c.i.rs].known | 0xffffff00; - v[c.i.rt].sign = 0; - } else { - v[c.i.rt].known = v[c.i.rs].known & 0x7f; - v[c.i.rt].sign = 0xffffff80; - } - break; + case OP_META: + switch (c.m.op) { + case OP_META_MOV: + v[c.m.rd] = v[c.m.rs]; + break; - case OP_META_EXTS: - v[c.i.rt].value = (s32)(s16)v[c.i.rs].value; - if (v[c.i.rs].known & BIT(15)) { - v[c.i.rt].known = v[c.i.rs].known | 0xffff0000; - v[c.i.rt].sign = 0; - } else { - v[c.i.rt].known = v[c.i.rs].known & 0x7fff; - v[c.i.rt].sign = 0xffff8000; + case OP_META_EXTC: + v[c.m.rd].value = (s32)(s8)v[c.m.rs].value; + if (v[c.m.rs].known & BIT(7)) { + v[c.m.rd].known = v[c.m.rs].known | 0xffffff00; + v[c.m.rd].sign = 0; + } else { + v[c.m.rd].known = v[c.m.rs].known & 0x7f; + v[c.m.rd].sign = 0xffffff80; + } + break; + + case OP_META_EXTS: + v[c.m.rd].value = (s32)(s16)v[c.m.rs].value; + if (v[c.m.rs].known & BIT(15)) { + v[c.m.rd].known = v[c.m.rs].known | 0xffff0000; + v[c.m.rd].sign = 0; + } else { + v[c.m.rd].known = v[c.m.rs].known & 0x7fff; + v[c.m.rd].sign = 0xffff8000; + } + break; + + case OP_META_COM: + v[c.m.rd].known = v[c.m.rs].known; + v[c.m.rd].value = ~v[c.m.rs].value; + v[c.m.rd].sign = v[c.m.rs].sign; + break; + default: + break; } break; + case OP_JAL: + v[31].known = 0xffffffff; + v[31].sign = 0; + v[31].value = block->pc + (idx + 2 << 2); + break; default: break; diff --git a/deps/lightrec/constprop.h b/deps/lightrec/constprop.h index cebf0b38..9f9ecc3c 100644 --- a/deps/lightrec/constprop.h +++ b/deps/lightrec/constprop.h @@ -10,7 +10,7 @@ #define LIGHTREC_CONSTPROP_INITIALIZER { { 0, 0xffffffff, 0 }, } -struct opcode; +struct block; struct constprop_data { u32 value; @@ -34,7 +34,7 @@ static inline _Bool is_known_zero(const struct constprop_data *v, u8 reg) return bits_are_known_zero(v, reg, 0xffffffff); } -void lightrec_consts_propagate(const struct opcode *list, +void lightrec_consts_propagate(const struct block *block, unsigned int idx, struct constprop_data *v); diff --git a/deps/lightrec/disassembler.c b/deps/lightrec/disassembler.c index bef95948..f687d28c 100644 --- a/deps/lightrec/disassembler.c +++ b/deps/lightrec/disassembler.c @@ -120,6 +120,13 @@ static const char * const cp2_opcodes[] = { [OP_CP2_NCCT] = "ncct ", }; +static const char * const meta_opcodes[] = { + [OP_META_MOV] = "move ", + [OP_META_EXTC] = "extc ", + [OP_META_EXTS] = "exts ", + [OP_META_COM] = "com ", +}; + static const char * const mult2_opcodes[] = { "mult2 ", "multu2 ", }; @@ -133,6 +140,7 @@ static const char * const opcode_io_flags[] = { "self-modifying code", "no invalidation", "no mask", + "load delay", }; static const char * const opcode_io_modes[] = { @@ -444,18 +452,11 @@ static int print_op(union code c, u32 pc, char *buf, size_t len, lightrec_reg_name(c.i.rt), (s16)c.i.imm, lightrec_reg_name(c.i.rs)); - case OP_META_MOV: - return snprintf(buf, len, "move %s,%s", - lightrec_reg_name(c.r.rd), - lightrec_reg_name(c.r.rs)); - case OP_META_EXTC: - return snprintf(buf, len, "extc %s,%s", - lightrec_reg_name(c.i.rt), - lightrec_reg_name(c.i.rs)); - case OP_META_EXTS: - return snprintf(buf, len, "exts %s,%s", - lightrec_reg_name(c.i.rt), - lightrec_reg_name(c.i.rs)); + case OP_META: + return snprintf(buf, len, "%s%s,%s", + meta_opcodes[c.m.op], + lightrec_reg_name(c.m.rd), + lightrec_reg_name(c.m.rs)); case OP_META_MULT2: case OP_META_MULTU2: *flags_ptr = opcode_multdiv_flags; diff --git a/deps/lightrec/disassembler.h b/deps/lightrec/disassembler.h index e4685a9d..9e39484c 100644 --- a/deps/lightrec/disassembler.h +++ b/deps/lightrec/disassembler.h @@ -24,9 +24,10 @@ #define LIGHTREC_SMC BIT(2) #define LIGHTREC_NO_INVALIDATE BIT(3) #define LIGHTREC_NO_MASK BIT(4) +#define LIGHTREC_LOAD_DELAY BIT(5) /* I/O mode for load/store opcodes */ -#define LIGHTREC_IO_MODE_LSB 5 +#define LIGHTREC_IO_MODE_LSB 6 #define LIGHTREC_IO_MODE(x) ((x) << LIGHTREC_IO_MODE_LSB) #define LIGHTREC_IO_UNKNOWN 0x0 #define LIGHTREC_IO_DIRECT 0x1 @@ -107,10 +108,7 @@ enum standard_opcodes { OP_LWC2 = 0x32, OP_SWC2 = 0x3a, - OP_META_MOV = 0x16, - - OP_META_EXTC = 0x17, - OP_META_EXTS = 0x18, + OP_META = 0x3b, OP_META_MULT2 = 0x19, OP_META_MULTU2 = 0x1a, @@ -195,6 +193,15 @@ enum cp2_basic_opcodes { OP_CP2_BASIC_CTC2 = 0x06, }; +enum meta_opcodes { + OP_META_MOV = 0x00, + + OP_META_EXTC = 0x01, + OP_META_EXTS = 0x02, + + OP_META_COM = 0x03, +}; + struct opcode_r { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ u32 zero :6; @@ -237,12 +244,31 @@ struct opcode_j { #endif } __packed; +struct opcode_m { +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + u32 meta :6; + u32 rs :5; + u32 rt :5; + u32 rd :5; + u32 imm :6; + u32 op :5; +#else + u32 op :5; + u32 imm :6; + u32 rd :5; + u32 rt :5; + u32 rs :5; + u32 meta :6; +#endif +}; + union code { /* Keep in sync with struct opcode */ u32 opcode; struct opcode_r r; struct opcode_i i; struct opcode_j j; + struct opcode_m m; }; struct opcode { @@ -255,6 +281,7 @@ struct opcode { struct opcode_r r; struct opcode_i i; struct opcode_j j; + struct opcode_m m; }; u32 flags; }; @@ -278,13 +305,12 @@ static inline _Bool op_flag_sync(u32 flags) static inline _Bool op_flag_smc(u32 flags) { - return OPT_FLAG_STORES && (flags & LIGHTREC_SMC); + return OPT_FLAG_IO && (flags & LIGHTREC_SMC); } static inline _Bool op_flag_no_invalidate(u32 flags) { - return (OPT_FLAG_IO || OPT_FLAG_STORES) && - (flags & LIGHTREC_NO_INVALIDATE); + return OPT_FLAG_IO && (flags & LIGHTREC_NO_INVALIDATE); } static inline _Bool op_flag_no_mask(u32 flags) @@ -292,6 +318,11 @@ static inline _Bool op_flag_no_mask(u32 flags) return OPT_FLAG_IO && (flags & LIGHTREC_NO_MASK); } +static inline _Bool op_flag_load_delay(u32 flags) +{ + return OPT_HANDLE_LOAD_DELAYS && (flags & LIGHTREC_LOAD_DELAY); +} + static inline _Bool op_flag_emulate_branch(u32 flags) { return OPT_DETECT_IMPOSSIBLE_BRANCHES && diff --git a/deps/lightrec/emitter.c b/deps/lightrec/emitter.c index 14820e50..a6d43551 100644 --- a/deps/lightrec/emitter.c +++ b/deps/lightrec/emitter.c @@ -21,6 +21,7 @@ static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block static void rec_REGIMM(struct lightrec_cstate *state, const struct block *block, u16 offset); static void rec_CP0(struct lightrec_cstate *state, const struct block *block, u16 offset); static void rec_CP2(struct lightrec_cstate *state, const struct block *block, u16 offset); +static void rec_META(struct lightrec_cstate *state, const struct block *block, u16 offset); static void rec_cp2_do_mtc2(struct lightrec_cstate *state, const struct block *block, u16 offset, u8 reg, u8 in_reg); static void rec_cp2_do_mfc2(struct lightrec_cstate *state, @@ -35,12 +36,24 @@ static void unknown_opcode(struct lightrec_cstate *state, const struct block *bl } static void -lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit) +lightrec_jump_to_fn(jit_state_t *_jit, void (*fn)(void)) { /* Prevent jit_jmpi() from using our cycles register as a temporary */ jit_live(LIGHTREC_REG_CYCLE); - jit_patch_abs(jit_jmpi(), state->state->eob_wrapper_func); + jit_patch_abs(jit_jmpi(), fn); +} + +static void +lightrec_jump_to_eob(struct lightrec_cstate *state, jit_state_t *_jit) +{ + lightrec_jump_to_fn(_jit, state->state->eob_wrapper_func); +} + +static void +lightrec_jump_to_ds_check(struct lightrec_cstate *state, jit_state_t *_jit) +{ + lightrec_jump_to_fn(_jit, state->state->ds_check_func); } static void update_ra_register(struct regcache *reg_cache, jit_state_t *_jit, @@ -61,7 +74,7 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; const struct opcode *op = &block->opcode_list[offset], - *next = &block->opcode_list[offset + 1]; + *ds = get_delay_slot(block->opcode_list, offset); u32 cycles = state->cycles + lightrec_cycles_of_opcode(op->c); jit_note(__FILE__, __LINE__); @@ -83,10 +96,10 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, if (has_delay_slot(op->c) && !op_flag_no_ds(op->flags) && !op_flag_local_branch(op->flags)) { - cycles += lightrec_cycles_of_opcode(next->c); + cycles += lightrec_cycles_of_opcode(ds->c); /* Recompile the delay slot */ - if (next->c.opcode) + if (ds->c.opcode) lightrec_rec_opcode(state, block, offset + 1); } @@ -98,11 +111,41 @@ static void lightrec_emit_end_of_block(struct lightrec_cstate *state, pr_debug("EOB: %u cycles\n", cycles); } - lightrec_jump_to_eob(state, _jit); + if (op_flag_load_delay(ds->flags) + && opcode_is_load(ds->c) && !state->no_load_delay) { + /* If the delay slot is a load opcode, its target register + * will be written after the first opcode of the target is + * executed. Handle this by jumping to a special section of + * the dispatcher. It expects the loaded value to be in + * REG_TEMP, and the target register number to be in JIT_V1.*/ + jit_movi(JIT_V1, ds->c.i.rt); + + lightrec_jump_to_ds_check(state, _jit); + } else { + lightrec_jump_to_eob(state, _jit); + } } -void lightrec_emit_eob(struct lightrec_cstate *state, - const struct block *block, u16 offset) +void lightrec_emit_jump_to_interpreter(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + jit_state_t *_jit = block->_jit; + + lightrec_clean_regs(reg_cache, _jit); + + /* Call the interpreter with the block's address in JIT_V1 and the + * PC (which might have an offset) in JIT_V0. */ + lightrec_load_imm(reg_cache, _jit, JIT_V0, block->pc, + block->pc + (offset << 2)); + jit_movi(JIT_V1, (uintptr_t)block); + + jit_subi(LIGHTREC_REG_CYCLE, LIGHTREC_REG_CYCLE, state->cycles); + lightrec_jump_to_fn(_jit, state->state->interpreter_func); +} + +static void lightrec_emit_eob(struct lightrec_cstate *state, + const struct block *block, u16 offset) { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; @@ -198,9 +241,9 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 jit_state_t *_jit = block->_jit; struct lightrec_branch *branch; const struct opcode *op = &block->opcode_list[offset], - *next = &block->opcode_list[offset + 1]; + *ds = get_delay_slot(block->opcode_list, offset); jit_node_t *addr; - bool is_forward = (s16)op->i.imm >= -1; + bool is_forward = (s16)op->i.imm >= 0; int op_cycles = lightrec_cycles_of_opcode(op->c); u32 target_offset, cycles = state->cycles + op_cycles; bool no_indirection = false; @@ -210,7 +253,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 jit_note(__FILE__, __LINE__); if (!op_flag_no_ds(op->flags)) - cycles += lightrec_cycles_of_opcode(next->c); + cycles += lightrec_cycles_of_opcode(ds->c); state->cycles = -op_cycles; @@ -224,7 +267,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 lightrec_do_early_unload(state, block, offset); if (op_flag_local_branch(op->flags) && - (op_flag_no_ds(op->flags) || !next->opcode) && + (op_flag_no_ds(op->flags) || !ds->opcode) && is_forward && !lightrec_has_dirty_regs(reg_cache)) no_indirection = true; @@ -246,8 +289,11 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 if (op_flag_local_branch(op->flags)) { /* Recompile the delay slot */ - if (!op_flag_no_ds(op->flags) && next->opcode) + if (!op_flag_no_ds(op->flags) && ds->opcode) { + /* Never handle load delays with local branches. */ + state->no_load_delay = true; lightrec_rec_opcode(state, block, offset + 1); + } if (link) update_ra_register(reg_cache, _jit, 31, block->pc, link); @@ -274,6 +320,7 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 if (!op_flag_local_branch(op->flags) || !is_forward) { next_pc = get_branch_pc(block, offset, 1 + (s16)op->i.imm); + state->no_load_delay = op_flag_local_branch(op->flags); lightrec_emit_end_of_block(state, block, offset, -1, next_pc, 31, link, false); } @@ -287,8 +334,10 @@ static void rec_b(struct lightrec_cstate *state, const struct block *block, u16 if (bz && link) update_ra_register(reg_cache, _jit, 31, block->pc, link); - if (!op_flag_no_ds(op->flags) && next->opcode) + if (!op_flag_no_ds(op->flags) && ds->opcode) { + state->no_load_delay = true; lightrec_rec_opcode(state, block, offset + 1); + } } } @@ -1090,6 +1139,7 @@ static void rec_io(struct lightrec_cstate *state, u32 flags = block->opcode_list[offset].flags; bool is_tagged = LIGHTREC_FLAGS_GET_IO_MODE(flags); u32 lut_entry; + u8 zero; jit_note(__FILE__, __LINE__); @@ -1100,6 +1150,16 @@ static void rec_io(struct lightrec_cstate *state, else if (load_rt) lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false); + if (op_flag_load_delay(flags) && !state->no_load_delay) { + /* Clear state->in_delay_slot_n. This notifies the lightrec_rw + * wrapper that it should write the REG_TEMP register instead of + * the actual output register of the opcode. */ + zero = lightrec_alloc_reg_in(reg_cache, _jit, 0, 0); + jit_stxi_c(offsetof(struct lightrec_state, in_delay_slot_n), + LIGHTREC_REG_STATE, zero); + lightrec_free_reg(reg_cache, zero); + } + if (is_tagged) { call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_RW); } else { @@ -1143,7 +1203,7 @@ static void rec_store_memory(struct lightrec_cstate *cstate, ((imm & 0x3) || simm + lut_offt != (s16)(simm + lut_offt)))); bool need_tmp = !no_mask || addr_offset || add_imm || invalidate; bool swc2 = c.i.op == OP_SWC2; - u8 in_reg = swc2 ? REG_CP2_TEMP : c.i.rt; + u8 in_reg = swc2 ? REG_TEMP : c.i.rt; rt = lightrec_alloc_reg_in(reg_cache, _jit, in_reg, 0); rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); @@ -1202,7 +1262,7 @@ static void rec_store_memory(struct lightrec_cstate *cstate, if (addr_reg == rs && c.i.rs == 0) { addr_reg = LIGHTREC_REG_STATE; } else { - jit_addr(tmp, addr_reg, LIGHTREC_REG_STATE); + jit_add_state(tmp, addr_reg); addr_reg = tmp; } @@ -1268,14 +1328,15 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, jit_state_t *_jit = block->_jit; jit_node_t *to_not_ram, *to_end; bool swc2 = c.i.op == OP_SWC2; - u8 tmp, tmp2, rs, rt, in_reg = swc2 ? REG_CP2_TEMP : c.i.rt; + bool offset_ram_or_scratch = state->offset_ram || state->offset_scratch; + u8 tmp, tmp2, rs, rt, in_reg = swc2 ? REG_TEMP : c.i.rt; s16 imm; jit_note(__FILE__, __LINE__); rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); tmp = lightrec_alloc_reg_temp(reg_cache, _jit); - if (state->offset_ram || state->offset_scratch) + if (offset_ram_or_scratch) tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); /* Convert to KUNSEG and avoid RAM mirrors */ @@ -1307,7 +1368,7 @@ static void rec_store_direct_no_invalidate(struct lightrec_cstate *cstate, jit_movi(tmp2, state->offset_ram); } - if (state->offset_ram || state->offset_scratch) { + if (offset_ram_or_scratch) { jit_addr(tmp, tmp, tmp2); lightrec_free_reg(reg_cache, tmp2); } @@ -1340,7 +1401,7 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block jit_node_t *to_not_ram, *to_end; bool swc2 = c.i.op == OP_SWC2; u8 tmp, tmp2, tmp3, masked_reg, rs, rt; - u8 in_reg = swc2 ? REG_CP2_TEMP : c.i.rt; + u8 in_reg = swc2 ? REG_TEMP : c.i.rt; jit_note(__FILE__, __LINE__); @@ -1376,7 +1437,7 @@ static void rec_store_direct(struct lightrec_cstate *cstate, const struct block if (!lut_is_32bit(state)) jit_lshi(tmp, tmp, 1); - jit_addr(tmp, LIGHTREC_REG_STATE, tmp); + jit_add_state(tmp, tmp); /* Write NULL to the code LUT to invalidate any block that's there */ if (lut_is_32bit(state)) @@ -1437,7 +1498,7 @@ static void rec_store(struct lightrec_cstate *state, case LIGHTREC_IO_SCRATCH: case LIGHTREC_IO_DIRECT: case LIGHTREC_IO_DIRECT_HW: - rec_cp2_do_mfc2(state, block, offset, c.i.rt, REG_CP2_TEMP); + rec_cp2_do_mfc2(state, block, offset, c.i.rt, REG_TEMP); break; default: break; @@ -1469,7 +1530,7 @@ static void rec_store(struct lightrec_cstate *state, } if (is_swc2) - lightrec_discard_reg_if_loaded(state->reg_cache, REG_CP2_TEMP); + lightrec_discard_reg_if_loaded(state->reg_cache, REG_TEMP); } static void rec_SB(struct lightrec_cstate *state, @@ -1519,14 +1580,15 @@ static void rec_load_memory(struct lightrec_cstate *cstate, { struct regcache *reg_cache = cstate->reg_cache; struct opcode *op = &block->opcode_list[offset]; + bool load_delay = op_flag_load_delay(op->flags) && !cstate->no_load_delay; jit_state_t *_jit = block->_jit; u8 rs, rt, out_reg, addr_reg, flags = REG_EXT; bool no_mask = op_flag_no_mask(op->flags); union code c = op->c; s16 imm; - if (c.i.op == OP_LWC2) - out_reg = REG_CP2_TEMP; + if (load_delay || c.i.op == OP_LWC2) + out_reg = REG_TEMP; else if (c.i.rt) out_reg = c.i.rt; else @@ -1619,14 +1681,16 @@ static void rec_load_direct(struct lightrec_cstate *cstate, { struct lightrec_state *state = cstate->state; struct regcache *reg_cache = cstate->reg_cache; - union code c = block->opcode_list[offset].c; + struct opcode *op = &block->opcode_list[offset]; + bool load_delay = op_flag_load_delay(op->flags) && !cstate->no_load_delay; jit_state_t *_jit = block->_jit; jit_node_t *to_not_ram, *to_not_bios, *to_end, *to_end2; u8 tmp, rs, rt, out_reg, addr_reg, flags = REG_EXT; + union code c = op->c; s16 imm; - if (c.i.op == OP_LWC2) - out_reg = REG_CP2_TEMP; + if (load_delay || c.i.op == OP_LWC2) + out_reg = REG_TEMP; else if (c.i.rt) out_reg = c.i.rt; else @@ -1754,8 +1818,8 @@ static void rec_load(struct lightrec_cstate *state, const struct block *block, } if (op->i.op == OP_LWC2) { - rec_cp2_do_mtc2(state, block, offset, op->i.rt, REG_CP2_TEMP); - lightrec_discard_reg_if_loaded(state->reg_cache, REG_CP2_TEMP); + rec_cp2_do_mtc2(state, block, offset, op->i.rt, REG_TEMP); + lightrec_discard_reg_if_loaded(state->reg_cache, REG_TEMP); } } @@ -1827,6 +1891,15 @@ static void rec_break_syscall(struct lightrec_cstate *state, jit_stxi_i(offsetof(struct lightrec_state, exit_flags), LIGHTREC_REG_STATE, tmp); + jit_ldxi_i(tmp, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(tmp, tmp, LIGHTREC_REG_CYCLE); + jit_movi(LIGHTREC_REG_CYCLE, 0); + jit_stxi_i(offsetof(struct lightrec_state, target_cycle), + LIGHTREC_REG_STATE, tmp); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, tmp); + lightrec_free_reg(reg_cache, tmp); /* TODO: the return address should be "pc - 4" if we're a delay slot */ @@ -1872,6 +1945,7 @@ static void rec_mtc(struct lightrec_cstate *state, const struct block *block, u1 jit_note(__FILE__, __LINE__); lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rs, false); lightrec_clean_reg_if_loaded(reg_cache, _jit, c.i.rt, false); + lightrec_clean_reg_if_loaded(reg_cache, _jit, REG_TEMP, false); call_to_c_wrapper(state, block, c.opcode, C_WRAPPER_MTC); @@ -1901,13 +1975,16 @@ rec_mfc0(struct lightrec_cstate *state, const struct block *block, u16 offset) lightrec_free_reg(reg_cache, rt); } -static bool block_in_bios(const struct lightrec_cstate *state, - const struct block *block) +static bool block_uses_icache(const struct lightrec_cstate *state, + const struct block *block) { - const struct lightrec_mem_map *bios = &state->state->maps[PSX_MAP_BIOS]; + const struct lightrec_mem_map *map = &state->state->maps[PSX_MAP_KERNEL_USER_RAM]; u32 pc = kunseg(block->pc); - return pc >= bios->pc && pc < bios->pc + bios->length; + if (pc < map->pc || pc >= map->pc + map->length) + return false; + + return (block->pc >> 28) < 0xa; } static void @@ -1933,11 +2010,11 @@ rec_mtc0(struct lightrec_cstate *state, const struct block *block, u16 offset) break; } - if (/*block_in_bios(state, block) &&*/ c.r.rd == 12) { - /* If we are running code from the BIOS, handle writes to the - * Status register in C. BIOS code may toggle bit 16 which will - * map/unmap the RAM, while game code cannot do that. */ - /* ^ wrong, it can execute from 0xa0000000 with isolated cache */ + if (!block_uses_icache(state, block) && c.r.rd == 12) { + /* If we are not running code from the RAM through kuseg or + * kseg0, handle writes to the Status register in C; as the + * code may toggle bit 16 which isolates the cache. Code + * running from kuseg or kseg0 in RAM cannot do that. */ rec_mtc(state, block, offset); return; } @@ -2193,7 +2270,6 @@ static void rec_cp2_do_mtc2(struct lightrec_cstate *state, { struct regcache *reg_cache = state->reg_cache; jit_state_t *_jit = block->_jit; - jit_node_t *loop, *to_loop; u8 rt, tmp, tmp2, flags = 0; _jit_name(block->_jit, __func__); @@ -2246,30 +2322,20 @@ static void rec_cp2_do_mtc2(struct lightrec_cstate *state, break; case 30: tmp = lightrec_alloc_reg_temp(reg_cache, _jit); - tmp2 = lightrec_alloc_reg_temp(reg_cache, _jit); /* if (rt < 0) rt = ~rt; */ jit_rshi(tmp, rt, 31); jit_xorr(tmp, rt, tmp); - /* We know the sign bit is 0. Left-shift by 1 to start the algorithm */ - jit_lshi(tmp, tmp, 1); - jit_movi(tmp2, 33); - - /* Decrement tmp2 and right-shift the value by 1 until it equals zero */ - loop = jit_label(); - jit_subi(tmp2, tmp2, 1); - jit_rshi_u(tmp, tmp, 1); - to_loop = jit_bnei(tmp, 0); - - jit_patch_at(to_loop, loop); + /* Count leading zeros */ + jit_clzr(tmp, tmp); + if (__WORDSIZE != 32) + jit_subi(tmp, tmp, __WORDSIZE - 32); - jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp2); - jit_stxi_i(cp2d_i_offset(30), LIGHTREC_REG_STATE, rt); + jit_stxi_i(cp2d_i_offset(31), LIGHTREC_REG_STATE, tmp); lightrec_free_reg(reg_cache, tmp); - lightrec_free_reg(reg_cache, tmp2); - break; + fallthrough; default: jit_stxi_i(cp2d_i_offset(reg), LIGHTREC_REG_STATE, rt); break; @@ -2406,34 +2472,44 @@ static void rec_meta_MOV(struct lightrec_cstate *state, unload_rd = OPT_EARLY_UNLOAD && LIGHTREC_FLAGS_GET_RD(op->flags) == LIGHTREC_REG_UNLOAD; - if (c.r.rs || unload_rd) - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.r.rs, 0); + if (c.m.rs && !lightrec_reg_is_loaded(reg_cache, c.m.rs)) { + /* The source register is not yet loaded - we can load its value + * from the register cache directly into the target register. */ + rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT); + + jit_ldxi_i(rd, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, regs.gpr) + (c.m.rs << 2)); - if (unload_rd) { + lightrec_free_reg(reg_cache, rd); + } else if (unload_rd) { /* If the destination register will be unloaded right after the * MOV meta-opcode, we don't actually need to write any host * register - we can just store the source register directly to * the register cache, at the offset corresponding to the * destination register. */ - lightrec_discard_reg_if_loaded(reg_cache, c.r.rd); + lightrec_discard_reg_if_loaded(reg_cache, c.m.rd); + + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0); jit_stxi_i(offsetof(struct lightrec_state, regs.gpr) - + c.r.rd << 2, LIGHTREC_REG_STATE, rs); + + (c.m.rd << 2), LIGHTREC_REG_STATE, rs); lightrec_free_reg(reg_cache, rs); } else { - rd = lightrec_alloc_reg_out(reg_cache, _jit, c.r.rd, REG_EXT); + if (c.m.rs) + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0); + + rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT); - if (c.r.rs == 0) + if (c.m.rs == 0) { jit_movi(rd, 0); - else + } else { jit_extr_i(rd, rs); + lightrec_free_reg(reg_cache, rs); + } lightrec_free_reg(reg_cache, rd); } - - if (c.r.rs || unload_rd) - lightrec_free_reg(reg_cache, rs); } static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state, @@ -2443,21 +2519,21 @@ static void rec_meta_EXTC_EXTS(struct lightrec_cstate *state, struct regcache *reg_cache = state->reg_cache; union code c = block->opcode_list[offset].c; jit_state_t *_jit = block->_jit; - u8 rs, rt; + u8 rs, rd; _jit_name(block->_jit, __func__); jit_note(__FILE__, __LINE__); - rs = lightrec_alloc_reg_in(reg_cache, _jit, c.i.rs, 0); - rt = lightrec_alloc_reg_out(reg_cache, _jit, c.i.rt, REG_EXT); + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0); + rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, REG_EXT); - if (c.i.op == OP_META_EXTC) - jit_extr_c(rt, rs); + if (c.m.op == OP_META_EXTC) + jit_extr_c(rd, rs); else - jit_extr_s(rt, rs); + jit_extr_s(rd, rs); lightrec_free_reg(reg_cache, rs); - lightrec_free_reg(reg_cache, rt); + lightrec_free_reg(reg_cache, rd); } static void rec_meta_MULT2(struct lightrec_cstate *state, @@ -2524,6 +2600,29 @@ static void rec_meta_MULT2(struct lightrec_cstate *state, jit_note(__FILE__, __LINE__); } +static void rec_meta_COM(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + struct regcache *reg_cache = state->reg_cache; + union code c = block->opcode_list[offset].c; + jit_state_t *_jit = block->_jit; + u8 rd, rs, flags; + + jit_note(__FILE__, __LINE__); + rs = lightrec_alloc_reg_in(reg_cache, _jit, c.m.rs, 0); + rd = lightrec_alloc_reg_out(reg_cache, _jit, c.m.rd, 0); + + flags = lightrec_get_reg_in_flags(reg_cache, rs); + + lightrec_set_reg_out_flags(reg_cache, rd, + flags & REG_EXT); + + jit_comr(rd, rs); + + lightrec_free_reg(reg_cache, rs); + lightrec_free_reg(reg_cache, rd); +} + static const lightrec_rec_func_t rec_standard[64] = { SET_DEFAULT_ELM(rec_standard, unknown_opcode), [OP_SPECIAL] = rec_SPECIAL, @@ -2559,9 +2658,7 @@ static const lightrec_rec_func_t rec_standard[64] = { [OP_LWC2] = rec_LW, [OP_SWC2] = rec_SW, - [OP_META_MOV] = rec_meta_MOV, - [OP_META_EXTC] = rec_meta_EXTC_EXTS, - [OP_META_EXTS] = rec_meta_EXTC_EXTS, + [OP_META] = rec_META, [OP_META_MULT2] = rec_meta_MULT2, [OP_META_MULTU2] = rec_meta_MULT2, }; @@ -2623,6 +2720,14 @@ static const lightrec_rec_func_t rec_cp2_basic[64] = { [OP_CP2_BASIC_CTC2] = rec_cp2_basic_CTC2, }; +static const lightrec_rec_func_t rec_meta[64] = { + SET_DEFAULT_ELM(rec_meta, unknown_opcode), + [OP_META_MOV] = rec_meta_MOV, + [OP_META_EXTC] = rec_meta_EXTC_EXTS, + [OP_META_EXTS] = rec_meta_EXTC_EXTS, + [OP_META_COM] = rec_meta_COM, +}; + static void rec_SPECIAL(struct lightrec_cstate *state, const struct block *block, u16 offset) { @@ -2676,6 +2781,18 @@ static void rec_CP2(struct lightrec_cstate *state, rec_CP(state, block, offset); } +static void rec_META(struct lightrec_cstate *state, + const struct block *block, u16 offset) +{ + union code c = block->opcode_list[offset].c; + lightrec_rec_func_t f = rec_meta[c.m.op]; + + if (!HAS_DEFAULT_ELM && unlikely(!f)) + unknown_opcode(state, block, offset); + else + (*f)(state, block, offset); +} + void lightrec_rec_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset) { @@ -2715,4 +2832,6 @@ void lightrec_rec_opcode(struct lightrec_cstate *state, lightrec_do_early_unload(state, block, unload_offset); } + + state->no_load_delay = false; } diff --git a/deps/lightrec/emitter.h b/deps/lightrec/emitter.h index 4cbe8da6..c960a7fb 100644 --- a/deps/lightrec/emitter.h +++ b/deps/lightrec/emitter.h @@ -13,7 +13,7 @@ struct lightrec_cstate; struct opcode; void lightrec_rec_opcode(struct lightrec_cstate *state, const struct block *block, u16 offset); -void lightrec_emit_eob(struct lightrec_cstate *state, - const struct block *block, u16 offset); +void lightrec_emit_jump_to_interpreter(struct lightrec_cstate *state, + const struct block *block, u16 offset); #endif /* __EMITTER_H__ */ diff --git a/deps/lightrec/interpreter.c b/deps/lightrec/interpreter.c index ea8098cd..80a07f32 100644 --- a/deps/lightrec/interpreter.c +++ b/deps/lightrec/interpreter.c @@ -16,6 +16,7 @@ struct interpreter; static u32 int_CP0(struct interpreter *inter); static u32 int_CP2(struct interpreter *inter); static u32 int_SPECIAL(struct interpreter *inter); +static u32 int_META(struct interpreter *inter); static u32 int_REGIMM(struct interpreter *inter); static u32 int_branch(struct interpreter *inter, u32 pc, union code code, bool branch); @@ -45,7 +46,7 @@ static inline u32 int_get_ds_pc(const struct interpreter *inter, s16 imm) static inline struct opcode *next_op(const struct interpreter *inter) { - return &inter->block->opcode_list[inter->offset + 1]; + return &inter->op[1]; } static inline u32 execute(lightrec_int_func_t func, struct interpreter *inter) @@ -186,7 +187,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) * interpreter in that case. * Same goes for when we have a branch in a delay slot of another * branch. */ - load_in_ds = load_in_delay_slot(op->c); + load_in_ds = opcode_is_load(op->c) || opcode_is_mfc(op->c); branch_in_ds = has_delay_slot(op->c); if (branch) { @@ -241,6 +242,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) new_op.c = op_next; new_op.flags = 0; inter2.op = &new_op; + inter2.offset = 0; /* Execute the first opcode of the next block */ lightrec_int_op(&inter2); @@ -259,6 +261,7 @@ static u32 int_delay_slot(struct interpreter *inter, u32 pc, bool branch) inter2.block = inter->block; inter2.op = op; inter2.cycles = inter->cycles; + inter2.offset = inter->offset + 1; if (dummy_ld) new_rt = reg_cache[op->r.rt]; @@ -351,11 +354,6 @@ static u32 int_jumpr(struct interpreter *inter, u8 link_reg) u32 old_pc = int_get_branch_pc(inter); u32 next_pc = state->regs.gpr[inter->op->r.rs]; - if (op_flag_emulate_branch(inter->op->flags) && inter->offset) { - inter->cycles -= lightrec_cycles_of_opcode(inter->op->c); - return old_pc; - } - if (link_reg) state->regs.gpr[link_reg] = old_pc + 8; @@ -391,11 +389,6 @@ static u32 int_branch(struct interpreter *inter, u32 pc, { u32 next_pc = pc + 4 + ((s16)code.i.imm << 2); - if (op_flag_emulate_branch(inter->op->flags) && inter->offset) { - inter->cycles -= lightrec_cycles_of_opcode(inter->op->c); - return pc; - } - update_cycles_before_branch(inter); if (op_flag_no_ds(inter->op->flags)) { @@ -605,11 +598,14 @@ static u32 int_io(struct interpreter *inter, bool is_load) { struct opcode_i *op = &inter->op->i; u32 *reg_cache = inter->state->regs.gpr; - u32 val; + u32 val, *flags = NULL; + + if (inter->block) + flags = &inter->op->flags; val = lightrec_rw(inter->state, inter->op->c, reg_cache[op->rs], reg_cache[op->rt], - &inter->op->flags, inter->block); + flags, inter->block, inter->offset); if (is_load && op->rt) reg_cache[op->rt] = val; @@ -632,7 +628,7 @@ static u32 int_store(struct interpreter *inter) lightrec_rw(inter->state, inter->op->c, inter->state->regs.gpr[inter->op->i.rs], inter->state->regs.gpr[inter->op->i.rt], - &inter->op->flags, inter->block); + &inter->op->flags, inter->block, inter->offset); next_pc = int_get_ds_pc(inter, 1); @@ -717,9 +713,9 @@ static u32 int_syscall_break(struct interpreter *inter) { if (inter->op->r.op == OP_SPECIAL_BREAK) - inter->state->exit_flags |= LIGHTREC_EXIT_BREAK; + lightrec_set_exit_flags(inter->state, LIGHTREC_EXIT_BREAK); else - inter->state->exit_flags |= LIGHTREC_EXIT_SYSCALL; + lightrec_set_exit_flags(inter->state, LIGHTREC_EXIT_SYSCALL); return int_get_ds_pc(inter, 0); } @@ -955,7 +951,7 @@ static u32 int_special_SLTU(struct interpreter *inter) static u32 int_META_MOV(struct interpreter *inter) { u32 *reg_cache = inter->state->regs.gpr; - struct opcode_r *op = &inter->op->r; + struct opcode_m *op = &inter->op->m; if (likely(op->rd)) reg_cache[op->rd] = reg_cache[op->rs]; @@ -966,10 +962,10 @@ static u32 int_META_MOV(struct interpreter *inter) static u32 int_META_EXTC(struct interpreter *inter) { u32 *reg_cache = inter->state->regs.gpr; - struct opcode_i *op = &inter->op->i; + struct opcode_m *op = &inter->op->m; - if (likely(op->rt)) - reg_cache[op->rt] = (u32)(s32)(s8)reg_cache[op->rs]; + if (likely(op->rd)) + reg_cache[op->rd] = (u32)(s32)(s8)reg_cache[op->rs]; return jump_next(inter); } @@ -977,10 +973,10 @@ static u32 int_META_EXTC(struct interpreter *inter) static u32 int_META_EXTS(struct interpreter *inter) { u32 *reg_cache = inter->state->regs.gpr; - struct opcode_i *op = &inter->op->i; + struct opcode_m *op = &inter->op->m; - if (likely(op->rt)) - reg_cache[op->rt] = (u32)(s32)(s16)reg_cache[op->rs]; + if (likely(op->rd)) + reg_cache[op->rd] = (u32)(s32)(s16)reg_cache[op->rs]; return jump_next(inter); } @@ -1012,6 +1008,17 @@ static u32 int_META_MULT2(struct interpreter *inter) return jump_next(inter); } +static u32 int_META_COM(struct interpreter *inter) +{ + u32 *reg_cache = inter->state->regs.gpr; + union code c = inter->op->c; + + if (likely(c.m.rd)) + reg_cache[c.m.rd] = ~reg_cache[c.m.rs]; + + return jump_next(inter); +} + static const lightrec_int_func_t int_standard[64] = { SET_DEFAULT_ELM(int_standard, int_unimplemented), [OP_SPECIAL] = int_SPECIAL, @@ -1047,9 +1054,7 @@ static const lightrec_int_func_t int_standard[64] = { [OP_LWC2] = int_LWC2, [OP_SWC2] = int_store, - [OP_META_MOV] = int_META_MOV, - [OP_META_EXTC] = int_META_EXTC, - [OP_META_EXTS] = int_META_EXTS, + [OP_META] = int_META, [OP_META_MULT2] = int_META_MULT2, [OP_META_MULTU2] = int_META_MULT2, }; @@ -1111,6 +1116,14 @@ static const lightrec_int_func_t int_cp2_basic[64] = { [OP_CP2_BASIC_CTC2] = int_ctc, }; +static const lightrec_int_func_t int_meta[64] = { + SET_DEFAULT_ELM(int_meta, int_unimplemented), + [OP_META_MOV] = int_META_MOV, + [OP_META_EXTC] = int_META_EXTC, + [OP_META_EXTS] = int_META_EXTS, + [OP_META_COM] = int_META_COM, +}; + static u32 int_SPECIAL(struct interpreter *inter) { lightrec_int_func_t f = int_special[inter->op->r.op]; @@ -1152,6 +1165,16 @@ static u32 int_CP2(struct interpreter *inter) return int_CP(inter); } +static u32 int_META(struct interpreter *inter) +{ + lightrec_int_func_t f = int_meta[inter->op->m.op]; + + if (!HAS_DEFAULT_ELM && unlikely(!f)) + return int_unimplemented(inter); + + return execute(f, inter); +} + static u32 lightrec_emulate_block_list(struct lightrec_state *state, struct block *block, u32 offset) { @@ -1188,3 +1211,75 @@ u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u3 return 0; } + +static u32 branch_get_next_pc(struct lightrec_state *state, union code c, u32 pc) +{ + switch (c.i.op) { + case OP_SPECIAL: + /* JR / JALR */ + return state->regs.gpr[c.r.rs]; + case OP_J: + case OP_JAL: + return (pc & 0xf0000000) | (c.j.imm << 2); + default: + /* Branch opcodes */ + return pc + 4 + ((s16)c.i.imm << 2); + } +} + +u32 lightrec_handle_load_delay(struct lightrec_state *state, + struct block *block, u32 pc, u32 reg) +{ + union code c = lightrec_read_opcode(state, pc); + struct opcode op[2] = { + { + .c = c, + .flags = 0, + }, + { + .flags = 0, + }, + }; + struct interpreter inter = { + .block = block, + .state = state, + .offset = 0, + .op = op, + .cycles = 0, + }; + bool branch_taken; + u32 reg_mask, next_pc; + + if (has_delay_slot(c)) { + op[1].c = lightrec_read_opcode(state, pc + 4); + + branch_taken = is_branch_taken(state->regs.gpr, c); + next_pc = branch_get_next_pc(state, c, pc); + + /* Branch was evaluated, we can write the load opcode's target + * register now. */ + state->regs.gpr[reg] = state->temp_reg; + + /* Handle JALR / regimm opcodes setting $ra (or any other + * register in the case of JALR) */ + reg_mask = (u32)opcode_write_mask(c); + if (reg_mask) + state->regs.gpr[ctz32(reg_mask)] = pc + 8; + + /* Handle delay slot of the branch opcode */ + pc = int_delay_slot(&inter, next_pc, branch_taken); + } else { + /* Make sure we only run one instruction */ + inter.delay_slot = true; + + lightrec_int_op(&inter); + pc += 4; + + if (!opcode_writes_register(c, reg)) + state->regs.gpr[reg] = state->temp_reg; + } + + state->current_cycle += inter.cycles; + + return pc; +} diff --git a/deps/lightrec/interpreter.h b/deps/lightrec/interpreter.h index 96600bfc..51c53906 100644 --- a/deps/lightrec/interpreter.h +++ b/deps/lightrec/interpreter.h @@ -11,5 +11,7 @@ struct block; u32 lightrec_emulate_block(struct lightrec_state *state, struct block *block, u32 pc); +u32 lightrec_handle_load_delay(struct lightrec_state *state, + struct block *block, u32 pc, u32 reg); #endif /* __LIGHTREC_INTERPRETER_H__ */ diff --git a/deps/lightrec/lightning-wrapper.h b/deps/lightrec/lightning-wrapper.h index b0e8bf3b..4cb97d3a 100644 --- a/deps/lightrec/lightning-wrapper.h +++ b/deps/lightrec/lightning-wrapper.h @@ -21,4 +21,14 @@ #define jit_b() jit_beqr(0, 0) +#if defined(__sh__) +#define jit_add_state(u,v) \ + do { \ + jit_new_node_ww(jit_code_movr,_R0,LIGHTREC_REG_STATE); \ + jit_new_node_www(jit_code_addr,u,v,_R0); \ + } while (0) +#else +#define jit_add_state(u,v) jit_addr(u,v,LIGHTREC_REG_STATE) +#endif + #endif /* __LIGHTNING_WRAPPER_H__ */ diff --git a/deps/lightrec/lightrec-config.h.cmakein b/deps/lightrec/lightrec-config.h.cmakein index 11886653..ed29ee4d 100644 --- a/deps/lightrec/lightrec-config.h.cmakein +++ b/deps/lightrec/lightrec-config.h.cmakein @@ -16,10 +16,10 @@ #cmakedefine01 OPT_REMOVE_DIV_BY_ZERO_SEQ #cmakedefine01 OPT_REPLACE_MEMSET #cmakedefine01 OPT_DETECT_IMPOSSIBLE_BRANCHES +#cmakedefine01 OPT_HANDLE_LOAD_DELAYS #cmakedefine01 OPT_TRANSFORM_OPS #cmakedefine01 OPT_LOCAL_BRANCHES #cmakedefine01 OPT_SWITCH_DELAY_SLOTS -#cmakedefine01 OPT_FLAG_STORES #cmakedefine01 OPT_FLAG_IO #cmakedefine01 OPT_FLAG_MULT_DIV #cmakedefine01 OPT_EARLY_UNLOAD diff --git a/deps/lightrec/lightrec-private.h b/deps/lightrec/lightrec-private.h index e67d406f..12e953a2 100644 --- a/deps/lightrec/lightrec-private.h +++ b/deps/lightrec/lightrec-private.h @@ -81,7 +81,7 @@ #define REG_LO 32 #define REG_HI 33 -#define REG_CP2_TEMP (offsetof(struct lightrec_state, cp2_temp_reg) / sizeof(u32)) +#define REG_TEMP (offsetof(struct lightrec_state, temp_reg) / sizeof(u32)) /* Definition of jit_state_t (avoids inclusion of ) */ struct jit_node; @@ -149,13 +149,16 @@ struct lightrec_cstate { unsigned int cycles; struct regcache *reg_cache; + + _Bool no_load_delay; }; struct lightrec_state { struct lightrec_registers regs; - u32 cp2_temp_reg; + u32 temp_reg; u32 next_pc; uintptr_t wrapper_regs[NUM_TEMPS]; + u8 in_delay_slot_n; u32 current_cycle; u32 target_cycle; u32 exit_flags; @@ -169,10 +172,13 @@ struct lightrec_state { struct reaper *reaper; void *tlsf; void (*eob_wrapper_func)(void); + void (*interpreter_func)(void); + void (*ds_check_func)(void); void (*memset_func)(void); void (*get_next_block)(void); struct lightrec_ops ops; unsigned int nb_precompile; + unsigned int nb_compile; unsigned int nb_maps; const struct lightrec_mem_map *maps; uintptr_t offset_ram, offset_bios, offset_scratch, offset_io; @@ -182,9 +188,8 @@ struct lightrec_state { void *code_lut[]; }; -u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u32 *flags, - struct block *block); +u32 lightrec_rw(struct lightrec_state *state, union code op, u32 addr, + u32 data, u32 *flags, struct block *block, u16 offset); void lightrec_free_block(struct lightrec_state *state, struct block *block); @@ -285,7 +290,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, struct block *block); void lightrec_free_opcode_list(struct lightrec_state *state, struct opcode *list); -unsigned int lightrec_cycles_of_opcode(union code code); +__cnst unsigned int lightrec_cycles_of_opcode(union code code); static inline u8 get_mult_div_lo(union code c) { @@ -349,4 +354,10 @@ static inline _Bool can_zero_extend(u32 value, u8 order) return (value >> order) == 0; } +static inline const struct opcode * +get_delay_slot(const struct opcode *list, u16 i) +{ + return op_flag_no_ds(list[i].flags) ? &list[i - 1] : &list[i + 1]; +} + #endif /* __LIGHTREC_PRIVATE_H__ */ diff --git a/deps/lightrec/lightrec.c b/deps/lightrec/lightrec.c index b9e82fb2..d5b1de96 100644 --- a/deps/lightrec/lightrec.c +++ b/deps/lightrec/lightrec.c @@ -237,26 +237,43 @@ lightrec_get_map(struct lightrec_state *state, void **host, u32 kaddr) return map; } -u32 lightrec_rw(struct lightrec_state *state, union code op, - u32 addr, u32 data, u32 *flags, struct block *block) +u32 lightrec_rw(struct lightrec_state *state, union code op, u32 base, + u32 data, u32 *flags, struct block *block, u16 offset) { const struct lightrec_mem_map *map; const struct lightrec_mem_map_ops *ops; u32 opcode = op.opcode; + bool was_tagged = true; + u16 old_flags; + u32 addr; void *host; - addr += (s16) op.i.imm; + addr = kunseg(base + (s16) op.i.imm); - map = lightrec_get_map(state, &host, kunseg(addr)); + map = lightrec_get_map(state, &host, addr); if (!map) { __segfault_cb(state, addr, block); return 0; } + if (flags) + was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(*flags); if (likely(!map->ops)) { - if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) - *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + if (flags && !LIGHTREC_FLAGS_GET_IO_MODE(*flags)) { + /* Force parallel port accesses as HW accesses, because + * the direct-I/O emitters can't differenciate it. */ + if (unlikely(map == &state->maps[PSX_MAP_PARALLEL_PORT])) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + /* If the base register is 0x0, be extra suspicious. + * Some games (e.g. Sled Storm) actually do segmentation + * faults by using uninitialized pointers, which are + * later initialized to point to hardware registers. */ + else if (op.i.rs && base == 0x0) + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_HW); + else + *flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + } ops = &lightrec_default_ops; } else if (flags && @@ -269,6 +286,17 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, ops = map->ops; } + if (!was_tagged) { + old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); + + if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { + pr_debug("Opcode of block at PC 0x%08x has been tagged" + " - flag for recompilation\n", block->pc); + + lut_write(state, lut_offset(block->pc), NULL); + } + } + switch (op.i.op) { case OP_SB: ops->sb(state, opcode, host, addr, (u8) data); @@ -311,10 +339,10 @@ u32 lightrec_rw(struct lightrec_state *state, union code op, static void lightrec_rw_helper(struct lightrec_state *state, union code op, u32 *flags, - struct block *block) + struct block *block, u16 offset) { u32 ret = lightrec_rw(state, op, state->regs.gpr[op.i.rs], - state->regs.gpr[op.i.rt], flags, block); + state->regs.gpr[op.i.rt], flags, block, offset); switch (op.i.op) { case OP_LB: @@ -324,8 +352,12 @@ static void lightrec_rw_helper(struct lightrec_state *state, case OP_LWL: case OP_LWR: case OP_LW: - if (op.i.rt) + if (OPT_HANDLE_LOAD_DELAYS && unlikely(!state->in_delay_slot_n)) { + state->temp_reg = ret; + state->in_delay_slot_n = 0xff; + } else if (op.i.rt) { state->regs.gpr[op.i.rt] = ret; + } fallthrough; default: break; @@ -334,16 +366,14 @@ static void lightrec_rw_helper(struct lightrec_state *state, static void lightrec_rw_cb(struct lightrec_state *state, u32 arg) { - lightrec_rw_helper(state, (union code) arg, NULL, NULL); + lightrec_rw_helper(state, (union code) arg, NULL, NULL, 0); } static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) { struct block *block; struct opcode *op; - bool was_tagged; u16 offset = (u16)arg; - u16 old_flags; block = lightrec_find_block_from_lut(state->block_cache, arg >> 16, state->next_pc); @@ -355,20 +385,7 @@ static void lightrec_rw_generic_cb(struct lightrec_state *state, u32 arg) } op = &block->opcode_list[offset]; - was_tagged = LIGHTREC_FLAGS_GET_IO_MODE(op->flags); - - lightrec_rw_helper(state, op->c, &op->flags, block); - - if (!was_tagged) { - old_flags = block_set_flags(block, BLOCK_SHOULD_RECOMPILE); - - if (!(old_flags & BLOCK_SHOULD_RECOMPILE)) { - pr_debug("Opcode of block at PC 0x%08x has been tagged" - " - flag for recompilation\n", block->pc); - - lut_write(state, lut_offset(block->pc), NULL); - } - } + lightrec_rw_helper(state, op->c, &op->flags, block, offset); } static u32 clamp_s32(s32 val, s32 min, s32 max) @@ -462,7 +479,7 @@ static void lightrec_mfc_cb(struct lightrec_state *state, union code op) u32 rt = lightrec_mfc(state, op); if (op.i.op == OP_SWC2) - state->cp2_temp_reg = rt; + state->temp_reg = rt; else if (op.r.rt) state->regs.gpr[op.r.rt] = rt; } @@ -603,7 +620,7 @@ static void lightrec_mtc_cb(struct lightrec_state *state, u32 arg) u8 reg; if (op.i.op == OP_LWC2) { - data = state->cp2_temp_reg; + data = state->temp_reg; reg = op.i.rt; } else { data = state->regs.gpr[op.r.rt]; @@ -703,6 +720,7 @@ static void * get_next_block_func(struct lightrec_state *state, u32 pc) } should_recompile = block_has_flag(block, BLOCK_SHOULD_RECOMPILE) && + !block_has_flag(block, BLOCK_NEVER_COMPILE) && !block_has_flag(block, BLOCK_IS_DEAD); if (unlikely(should_recompile)) { @@ -803,6 +821,8 @@ static void lightrec_free_code(struct lightrec_state *state, void *ptr) lightrec_code_alloc_unlock(state); } +static char lightning_code_data[0x80000]; + static void * lightrec_emit_code(struct lightrec_state *state, const struct block *block, jit_state_t *_jit, unsigned int *size) @@ -813,7 +833,9 @@ static void * lightrec_emit_code(struct lightrec_state *state, jit_realize(); - if (!ENABLE_DISASSEMBLER) + if (ENABLE_DISASSEMBLER) + jit_set_data(lightning_code_data, sizeof(lightning_code_data), 0); + else jit_set_data(NULL, 0, JIT_DISABLE_DATA | JIT_DISABLE_NOTE); if (has_code_buffer) { @@ -872,6 +894,15 @@ static struct block * generate_wrapper(struct lightrec_state *state) unsigned int i; jit_node_t *addr[C_WRAPPERS_COUNT - 1]; jit_node_t *to_end[C_WRAPPERS_COUNT - 1]; + u8 tmp = JIT_R1; + +#ifdef __sh__ + /* On SH, GBR-relative loads target the r0 register. + * Use it as the temporary register to factorize the move to + * JIT_R1. */ + if (LIGHTREC_REG_STATE == _GBR) + tmp = _R0; +#endif block = lightrec_malloc(state, MEM_FOR_IR, sizeof(*block)); if (!block) @@ -890,17 +921,18 @@ static struct block * generate_wrapper(struct lightrec_state *state) /* Add entry points */ for (i = C_WRAPPERS_COUNT - 1; i > 0; i--) { - jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + jit_ldxi(tmp, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, c_wrappers[i])); to_end[i - 1] = jit_b(); addr[i - 1] = jit_indirect(); } - jit_ldxi(JIT_R1, LIGHTREC_REG_STATE, + jit_ldxi(tmp, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, c_wrappers[0])); for (i = 0; i < C_WRAPPERS_COUNT - 1; i++) jit_patch(to_end[i]); + jit_movr(JIT_R1, tmp); jit_epilog(); jit_prolog(); @@ -1002,11 +1034,54 @@ static u32 lightrec_memset(struct lightrec_state *state) return 8 + 5 * (length + 3 / 4); } +static u32 lightrec_check_load_delay(struct lightrec_state *state, u32 pc, u8 reg) +{ + struct block *block; + union code first_op; + + first_op = lightrec_read_opcode(state, pc); + + if (likely(!opcode_reads_register(first_op, reg))) { + state->regs.gpr[reg] = state->temp_reg; + } else { + block = lightrec_get_block(state, pc); + if (unlikely(!block)) { + pr_err("Unable to get block at PC 0x%08x\n", pc); + lightrec_set_exit_flags(state, LIGHTREC_EXIT_SEGFAULT); + pc = 0; + } else { + pc = lightrec_handle_load_delay(state, block, pc, reg); + } + } + + return pc; +} + +static void update_cycle_counter_before_c(jit_state_t *_jit) +{ + /* update state->current_cycle */ + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(JIT_R1, JIT_R2, LIGHTREC_REG_CYCLE); + jit_stxi_i(offsetof(struct lightrec_state, current_cycle), + LIGHTREC_REG_STATE, JIT_R1); +} + +static void update_cycle_counter_after_c(jit_state_t *_jit) +{ + /* Recalc the delta */ + jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, current_cycle)); + jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, + offsetof(struct lightrec_state, target_cycle)); + jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); +} + static struct block * generate_dispatcher(struct lightrec_state *state) { struct block *block; jit_state_t *_jit; - jit_node_t *to_end, *loop, *addr, *addr2, *addr3; + jit_node_t *to_end, *loop, *addr, *addr2, *addr3, *addr4, *addr5, *jmp, *jmp2; unsigned int i; u32 offset; @@ -1047,13 +1122,70 @@ static struct block * generate_dispatcher(struct lightrec_state *state) jit_prepare(); jit_pushargr(LIGHTREC_REG_STATE); + jit_finishi(lightrec_memset); + jit_retval(LIGHTREC_REG_CYCLE); jit_ldxi_ui(JIT_V0, LIGHTREC_REG_STATE, offsetof(struct lightrec_state, regs.gpr[31])); - - jit_retval(LIGHTREC_REG_CYCLE); jit_subr(LIGHTREC_REG_CYCLE, JIT_V1, LIGHTREC_REG_CYCLE); + + if (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS) + jmp = jit_b(); + } + + if (OPT_DETECT_IMPOSSIBLE_BRANCHES) { + /* Blocks will jump here when they reach a branch that should + * be executed with the interpreter, passing the branch's PC + * in JIT_V0 and the address of the block in JIT_V1. */ + addr4 = jit_indirect(); + + update_cycle_counter_before_c(_jit); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_V1); + jit_pushargr(JIT_V0); + jit_finishi(lightrec_emulate_block); + + jit_retval(JIT_V0); + + update_cycle_counter_after_c(_jit); + + if (OPT_HANDLE_LOAD_DELAYS) + jmp2 = jit_b(); + + } + + if (OPT_HANDLE_LOAD_DELAYS) { + /* Blocks will jump here when they reach a branch with a load + * opcode in its delay slot. The delay slot has already been + * executed; the load value is in (state->temp_reg), and the + * register number is in JIT_V1. + * Jump to a C function which will evaluate the branch target's + * first opcode, to make sure that it does not read the register + * in question; and if it does, handle it accordingly. */ + addr5 = jit_indirect(); + + update_cycle_counter_before_c(_jit); + + jit_prepare(); + jit_pushargr(LIGHTREC_REG_STATE); + jit_pushargr(JIT_V0); + jit_pushargr(JIT_V1); + jit_finishi(lightrec_check_load_delay); + + jit_retval(JIT_V0); + + update_cycle_counter_after_c(_jit); + + if (OPT_DETECT_IMPOSSIBLE_BRANCHES) + jit_patch(jmp2); + } + + if (OPT_REPLACE_MEMSET + && (OPT_DETECT_IMPOSSIBLE_BRANCHES || OPT_HANDLE_LOAD_DELAYS)) { + jit_patch(jmp); } /* The block will jump here, with the number of cycles remaining in @@ -1077,7 +1209,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state) /* If possible, use the code LUT */ if (!lut_is_32bit(state)) jit_lshi(JIT_V1, JIT_V1, 1); - jit_addr(JIT_V1, JIT_V1, LIGHTREC_REG_STATE); + jit_add_state(JIT_V1, JIT_V1); offset = offsetof(struct lightrec_state, code_lut); if (lut_is_32bit(state)) @@ -1097,11 +1229,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state) if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* We may call the interpreter - update state->current_cycle */ - jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); - jit_subr(JIT_V1, JIT_R2, LIGHTREC_REG_CYCLE); - jit_stxi_i(offsetof(struct lightrec_state, current_cycle), - LIGHTREC_REG_STATE, JIT_V1); + update_cycle_counter_before_c(_jit); } jit_prepare(); @@ -1119,11 +1247,7 @@ static struct block * generate_dispatcher(struct lightrec_state *state) if (ENABLE_FIRST_PASS || OPT_DETECT_IMPOSSIBLE_BRANCHES) { /* The interpreter may have updated state->current_cycle and * state->target_cycle - recalc the delta */ - jit_ldxi_i(JIT_R1, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, current_cycle)); - jit_ldxi_i(JIT_R2, LIGHTREC_REG_STATE, - offsetof(struct lightrec_state, target_cycle)); - jit_subr(LIGHTREC_REG_CYCLE, JIT_R2, JIT_R1); + update_cycle_counter_after_c(_jit); } else { jit_movr(LIGHTREC_REG_CYCLE, JIT_V0); } @@ -1153,6 +1277,10 @@ static struct block * generate_dispatcher(struct lightrec_state *state) goto err_free_block; state->eob_wrapper_func = jit_address(addr2); + if (OPT_DETECT_IMPOSSIBLE_BRANCHES) + state->interpreter_func = jit_address(addr4); + if (OPT_HANDLE_LOAD_DELAYS) + state->ds_check_func = jit_address(addr5); if (OPT_REPLACE_MEMSET) state->memset_func = jit_address(addr3); state->get_next_block = jit_address(addr); @@ -1183,7 +1311,7 @@ union code lightrec_read_opcode(struct lightrec_state *state, u32 pc) return (union code) LE32TOH(*code); } -unsigned int lightrec_cycles_of_opcode(union code code) +__cnst unsigned int lightrec_cycles_of_opcode(union code code) { return 2; } @@ -1291,11 +1419,6 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, pr_debug("Block size: %hu opcodes\n", block->nb_ops); - /* If the first opcode is an 'impossible' branch, never compile the - * block */ - if (should_emulate(block->opcode_list)) - block_flags |= BLOCK_NEVER_COMPILE; - fully_tagged = lightrec_block_is_fully_tagged(block); if (fully_tagged) block_flags |= BLOCK_FULLY_TAGGED; @@ -1311,7 +1434,7 @@ static struct block * lightrec_precompile_block(struct lightrec_state *state, addr = state->get_next_block; lut_write(state, lut_offset(pc), addr); - pr_debug("Recompile count: %u\n", state->nb_precompile++); + pr_debug("Blocks created: %u\n", ++state->nb_precompile); return block; } @@ -1324,8 +1447,12 @@ static bool lightrec_block_is_fully_tagged(const struct block *block) for (i = 0; i < block->nb_ops; i++) { op = &block->opcode_list[i]; - /* Verify that all load/stores of the opcode list - * Check all loads/stores of the opcode list and mark the + /* If we have one branch that must be emulated, we cannot trash + * the opcode list. */ + if (should_emulate(op)) + return false; + + /* Check all loads/stores of the opcode list and mark the * block as fully compiled if they all have been tagged. */ switch (op->c.i.op) { case OP_LB: @@ -1421,6 +1548,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, cstate->cycles = 0; cstate->nb_local_branches = 0; cstate->nb_targets = 0; + cstate->no_load_delay = false; jit_prolog(); jit_tramp(256); @@ -1439,7 +1567,7 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, pr_debug("Branch at offset 0x%x will be emulated\n", i << 2); - lightrec_emit_eob(cstate, block, i); + lightrec_emit_jump_to_interpreter(cstate, block, i); skip_next = !op_flag_no_ds(elm->flags); } else { lightrec_rec_opcode(cstate, block, i); @@ -1603,6 +1731,8 @@ int lightrec_compile_block(struct lightrec_cstate *cstate, lightrec_unregister(MEM_FOR_CODE, old_code_size); } + pr_debug("Blocks compiled: %u\n", ++state->nb_compile); + return 0; } @@ -1775,6 +1905,7 @@ struct lightrec_state * lightrec_init(char *argv0, state->tlsf = tlsf; state->with_32bit_lut = with_32bit_lut; + state->in_delay_slot_n = 0xff; state->block_cache = lightrec_blockcache_init(state); if (!state->block_cache) diff --git a/deps/lightrec/lightrec.h b/deps/lightrec/lightrec.h index 9cd7f478..bd878c86 100644 --- a/deps/lightrec/lightrec.h +++ b/deps/lightrec/lightrec.h @@ -28,6 +28,21 @@ extern "C" { # define __api #endif +#ifndef __cnst +# ifdef __GNUC__ +# define __cnst __attribute__((const)) +# else +# define __cnst +# endif +#endif +#ifndef __pure +# ifdef __GNUC__ +# define __pure __attribute__((pure)) +# else +# define __pure +# endif +#endif + typedef uint64_t u64; typedef uint32_t u32; typedef uint16_t u16; @@ -119,7 +134,8 @@ __api void lightrec_set_invalidate_mode(struct lightrec_state *state, __api void lightrec_set_exit_flags(struct lightrec_state *state, u32 flags); __api u32 lightrec_exit_flags(struct lightrec_state *state); -__api struct lightrec_registers * lightrec_get_registers(struct lightrec_state *state); +__api __cnst struct lightrec_registers * +lightrec_get_registers(struct lightrec_state *state); __api u32 lightrec_current_cycle_count(const struct lightrec_state *state); __api void lightrec_reset_cycle_count(struct lightrec_state *state, u32 cycles); diff --git a/deps/lightrec/memmanager.c b/deps/lightrec/memmanager.c index c7502cdb..2934d4c7 100644 --- a/deps/lightrec/memmanager.c +++ b/deps/lightrec/memmanager.c @@ -9,7 +9,7 @@ #include -#ifdef ENABLE_THREADED_COMPILER +#if ENABLE_THREADED_COMPILER #include static atomic_uint lightrec_bytes[MEM_TYPE_END]; diff --git a/deps/lightrec/optimizer.c b/deps/lightrec/optimizer.c index 04d9d809..5ce58ada 100644 --- a/deps/lightrec/optimizer.c +++ b/deps/lightrec/optimizer.c @@ -115,6 +115,8 @@ static u64 opcode_read_mask(union code op) case OP_SW: case OP_SWR: return BIT(op.i.rs) | BIT(op.i.rt); + case OP_META: + return BIT(op.m.rs); default: return BIT(op.i.rs); } @@ -139,12 +141,14 @@ static u64 mult_div_write_mask(union code op) return flags; } -static u64 opcode_write_mask(union code op) +u64 opcode_write_mask(union code op) { switch (op.i.op) { case OP_META_MULT2: case OP_META_MULTU2: return mult_div_write_mask(op); + case OP_META: + return BIT(op.m.rd); case OP_SPECIAL: switch (op.r.op) { case OP_SPECIAL_JR: @@ -182,8 +186,6 @@ static u64 opcode_write_mask(union code op) case OP_LBU: case OP_LHU: case OP_LWR: - case OP_META_EXTC: - case OP_META_EXTS: return BIT(op.i.rt); case OP_JAL: return BIT(31); @@ -214,8 +216,6 @@ static u64 opcode_write_mask(union code op) default: return 0; } - case OP_META_MOV: - return BIT(op.r.rd); default: return 0; } @@ -339,7 +339,39 @@ static bool reg_is_read_or_written(const struct opcode *list, return reg_is_read(list, a, b, reg) || reg_is_written(list, a, b, reg); } -static bool opcode_is_load(union code op) +bool opcode_is_mfc(union code op) +{ + switch (op.i.op) { + case OP_CP0: + switch (op.r.rs) { + case OP_CP0_MFC0: + case OP_CP0_CFC0: + return true; + default: + break; + } + + break; + case OP_CP2: + if (op.r.op == OP_CP2_BASIC) { + switch (op.r.rs) { + case OP_CP2_BASIC_MFC2: + case OP_CP2_BASIC_CFC2: + return true; + default: + break; + } + } + + break; + default: + break; + } + + return false; +} + +bool opcode_is_load(union code op) { switch (op.i.op) { case OP_LB: @@ -456,46 +488,6 @@ static bool is_nop(union code op) } } -bool load_in_delay_slot(union code op) -{ - switch (op.i.op) { - case OP_CP0: - switch (op.r.rs) { - case OP_CP0_MFC0: - case OP_CP0_CFC0: - return true; - default: - break; - } - - break; - case OP_CP2: - if (op.r.op == OP_CP2_BASIC) { - switch (op.r.rs) { - case OP_CP2_BASIC_MFC2: - case OP_CP2_BASIC_CFC2: - return true; - default: - break; - } - } - - break; - case OP_LB: - case OP_LH: - case OP_LW: - case OP_LWL: - case OP_LWR: - case OP_LBU: - case OP_LHU: - return true; - default: - break; - } - - return false; -} - static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset, struct constprop_data *v) { @@ -592,9 +584,10 @@ static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset, ldop->i.rt = next->r.rd; to_change->opcode = 0; } else { - to_change->i.op = OP_META_MOV; - to_change->r.rd = next->r.rd; - to_change->r.rs = ldop->i.rt; + to_change->i.op = OP_META; + to_change->m.op = OP_META_MOV; + to_change->m.rd = next->r.rd; + to_change->m.rs = ldop->i.rt; } if (to_nop->r.imm == 24) @@ -611,18 +604,9 @@ static void lightrec_optimize_sll_sra(struct opcode *list, unsigned int offset, pr_debug("Convert SLL/SRA #%u to EXT%c\n", curr->r.imm, curr->r.imm == 24 ? 'C' : 'S'); - if (to_change == curr) { - to_change->i.rs = curr->r.rt; - to_change->i.rt = next->r.rd; - } else { - to_change->i.rt = next->r.rd; - to_change->i.rs = curr->r.rt; - } - - if (to_nop->r.imm == 24) - to_change->i.op = OP_META_EXTC; - else - to_change->i.op = OP_META_EXTS; + to_change->m.rs = curr->r.rt; + to_change->m.op = to_nop->r.imm == 24 ? OP_META_EXTC : OP_META_EXTS; + to_change->i.op = OP_META; } to_nop->opcode = 0; @@ -678,6 +662,12 @@ static void lightrec_modify_lui(struct block *block, unsigned int offset) break; if (opcode_writes_register(c, lui->i.rt)) { + if (c.i.op == OP_LWL || c.i.op == OP_LWR) { + /* LWL/LWR only partially write their target register; + * therefore the LUI should not write a different value. */ + break; + } + pr_debug("Convert LUI at offset 0x%x to kuseg\n", i - 1 << 2); lui->i.imm = kunseg(lui->i.imm << 16) >> 16; @@ -796,13 +786,11 @@ static void lightrec_patch_known_zero(struct opcode *op, case OP_ANDI: case OP_ORI: case OP_XORI: - case OP_META_MOV: - case OP_META_EXTC: - case OP_META_EXTS: case OP_META_MULT2: case OP_META_MULTU2: - if (is_known_zero(v, op->i.rs)) - op->i.rs = 0; + case OP_META: + if (is_known_zero(v, op->m.rs)) + op->m.rs = 0; break; case OP_SB: case OP_SH: @@ -842,9 +830,14 @@ static void lightrec_reset_syncs(struct block *block) for (i = 0; i < block->nb_ops; i++) { op = &list[i]; - if (op_flag_local_branch(op->flags) && has_delay_slot(op->c)) { - offset = i + 1 + (s16)op->i.imm; - list[offset].flags |= LIGHTREC_SYNC; + if (has_delay_slot(op->c)) { + if (op_flag_local_branch(op->flags)) { + offset = i + 1 - op_flag_no_ds(op->flags) + (s16)op->i.imm; + list[offset].flags |= LIGHTREC_SYNC; + } + + if (op_flag_emulate_branch(op->flags) && i + 2 < block->nb_ops) + list[i + 2].flags |= LIGHTREC_SYNC; } } } @@ -860,7 +853,7 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl for (i = 0; i < block->nb_ops; i++) { op = &list[i]; - lightrec_consts_propagate(list, i, v); + lightrec_consts_propagate(block, i, v); lightrec_patch_known_zero(op, v); @@ -963,8 +956,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl case OP_ADDIU: if (op->i.imm == 0) { pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n"); - op->i.op = OP_META_MOV; - op->r.rd = op->i.rt; + op->m.rd = op->i.rt; + op->m.op = OP_META_MOV; + op->i.op = OP_META; } break; case OP_ANDI: @@ -974,8 +968,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl if (op->i.rs == op->i.rt) { op->opcode = 0; } else { - op->i.op = OP_META_MOV; - op->r.rd = op->i.rt; + op->m.rd = op->i.rt; + op->m.op = OP_META_MOV; + op->i.op = OP_META; } } break; @@ -1023,8 +1018,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl case OP_SPECIAL_SRA: if (op->r.imm == 0) { pr_debug("Convert SRA #0 to MOV\n"); - op->i.op = OP_META_MOV; - op->r.rs = op->r.rt; + op->m.rs = op->r.rt; + op->m.op = OP_META_MOV; + op->i.op = OP_META; break; } break; @@ -1041,8 +1037,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl case OP_SPECIAL_SLL: if (op->r.imm == 0) { pr_debug("Convert SLL #0 to MOV\n"); - op->i.op = OP_META_MOV; - op->r.rs = op->r.rt; + op->m.rs = op->r.rt; + op->m.op = OP_META_MOV; + op->i.op = OP_META; } lightrec_optimize_sll_sra(block->opcode_list, i, v); @@ -1060,8 +1057,9 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl case OP_SPECIAL_SRL: if (op->r.imm == 0) { pr_debug("Convert SRL #0 to MOV\n"); - op->i.op = OP_META_MOV; - op->r.rs = op->r.rt; + op->m.rs = op->r.rt; + op->m.op = OP_META_MOV; + op->i.op = OP_META; } break; @@ -1087,20 +1085,31 @@ static int lightrec_transform_ops(struct lightrec_state *state, struct block *bl op->r.op = ctz32(v[op->r.rt].value); break; + case OP_SPECIAL_NOR: + if (op->r.rs == 0 || op->r.rt == 0) { + pr_debug("Convert NOR $zero to COM\n"); + op->i.op = OP_META; + op->m.op = OP_META_COM; + if (!op->m.rs) + op->m.rs = op->r.rt; + } + break; case OP_SPECIAL_OR: case OP_SPECIAL_ADD: case OP_SPECIAL_ADDU: if (op->r.rs == 0) { pr_debug("Convert OR/ADD $zero to MOV\n"); - op->i.op = OP_META_MOV; - op->r.rs = op->r.rt; + op->m.rs = op->r.rt; + op->m.op = OP_META_MOV; + op->i.op = OP_META; } fallthrough; case OP_SPECIAL_SUB: case OP_SPECIAL_SUBU: if (op->r.rt == 0) { pr_debug("Convert OR/ADD/SUB $zero to MOV\n"); - op->i.op = OP_META_MOV; + op->m.op = OP_META_MOV; + op->i.op = OP_META; } fallthrough; default: @@ -1197,6 +1206,9 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc if (op_flag_sync(next->flags)) continue; + if (op_flag_load_delay(next->flags) && opcode_is_load(next_op)) + continue; + if (!lightrec_can_switch_delay_slot(list->c, next_op)) continue; @@ -1214,52 +1226,20 @@ static int lightrec_switch_delay_slots(struct lightrec_state *state, struct bloc return 0; } -static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size) -{ - struct opcode_list *list, *old_list; - - if (new_size >= block->nb_ops) { - pr_err("Invalid shrink size (%u vs %u)\n", - new_size, block->nb_ops); - return -EINVAL; - } - - list = lightrec_malloc(state, MEM_FOR_IR, - sizeof(*list) + sizeof(struct opcode) * new_size); - if (!list) { - pr_err("Unable to allocate memory\n"); - return -ENOMEM; - } - - old_list = container_of(block->opcode_list, struct opcode_list, ops); - memcpy(list->ops, old_list->ops, sizeof(struct opcode) * new_size); - - lightrec_free_opcode_list(state, block->opcode_list); - list->nb_ops = new_size; - block->nb_ops = new_size; - block->opcode_list = list->ops; - - pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n", - block->pc, new_size); - - return 0; -} - static int lightrec_detect_impossible_branches(struct lightrec_state *state, struct block *block) { struct opcode *op, *list = block->opcode_list, *next = &list[0]; unsigned int i; int ret = 0; - s16 offset; for (i = 0; i < block->nb_ops - 1; i++) { op = next; next = &list[i + 1]; if (!has_delay_slot(op->c) || - (!load_in_delay_slot(next->c) && - !has_delay_slot(next->c) && + (!has_delay_slot(next->c) && + !opcode_is_mfc(next->c) && !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE))) continue; @@ -1270,40 +1250,120 @@ static int lightrec_detect_impossible_branches(struct lightrec_state *state, continue; } - offset = i + 1 + (s16)op->i.imm; - if (load_in_delay_slot(next->c) && - (offset >= 0 && offset < block->nb_ops) && - !opcode_reads_register(list[offset].c, next->c.i.rt)) { - /* The 'impossible' branch is a local branch - we can - * verify here that the first opcode of the target does - * not use the target register of the delay slot */ - - pr_debug("Branch at offset 0x%x has load delay slot, " - "but is local and dest opcode does not read " - "dest register\n", i << 2); + op->flags |= LIGHTREC_EMULATE_BRANCH; + + if (OPT_LOCAL_BRANCHES && i + 2 < block->nb_ops) { + /* The interpreter will only emulate the branch, then + * return to the compiled code. Add a SYNC after the + * branch + delay slot in the case where the branch + * was not taken. */ + list[i + 2].flags |= LIGHTREC_SYNC; + } + } + + return ret; +} + +static bool is_local_branch(const struct block *block, unsigned int idx) +{ + const struct opcode *op = &block->opcode_list[idx]; + s32 offset; + + switch (op->c.i.op) { + case OP_BEQ: + case OP_BNE: + case OP_BLEZ: + case OP_BGTZ: + case OP_REGIMM: + offset = idx + 1 + (s16)op->c.i.imm; + if (offset >= 0 && offset < block->nb_ops) + return true; + fallthrough; + default: + return false; + } +} + +static int lightrec_handle_load_delays(struct lightrec_state *state, + struct block *block) +{ + struct opcode *op, *list = block->opcode_list; + unsigned int i; + s16 imm; + + for (i = 0; i < block->nb_ops; i++) { + op = &list[i]; + + if (!opcode_is_load(op->c) || !op->c.i.rt || op->c.i.op == OP_LWC2) + continue; + + if (!is_delay_slot(list, i)) { + /* Only handle load delays in delay slots. + * PSX games never abused load delay slots otherwise. */ continue; } - op->flags |= LIGHTREC_EMULATE_BRANCH; + if (is_local_branch(block, i - 1)) { + imm = (s16)list[i - 1].c.i.imm; - if (op == list) { - pr_debug("First opcode of block PC 0x%08x is an impossible branch\n", - block->pc); + if (!opcode_reads_register(list[i + imm].c, op->c.i.rt)) { + /* The target opcode of the branch is inside + * the block, and it does not read the register + * written to by the load opcode; we can ignore + * the load delay. */ + continue; + } + } - /* If the first opcode is an 'impossible' branch, we - * only keep the first two opcodes of the block (the - * branch itself + its delay slot) */ - if (block->nb_ops > 2) - ret = shrink_opcode_list(state, block, 2); - break; + op->flags |= LIGHTREC_LOAD_DELAY; + } + + return 0; +} + +static int lightrec_swap_load_delays(struct lightrec_state *state, + struct block *block) +{ + unsigned int i; + union code c, next; + bool in_ds = false, skip_next = false; + struct opcode op; + + if (block->nb_ops < 2) + return 0; + + for (i = 0; i < block->nb_ops - 2; i++) { + c = block->opcode_list[i].c; + + if (skip_next) { + skip_next = false; + } else if (!in_ds && opcode_is_load(c) && c.i.op != OP_LWC2) { + next = block->opcode_list[i + 1].c; + + if (c.i.op == OP_LWL && next.i.op == OP_LWR) + continue; + + if (opcode_reads_register(next, c.i.rt) + && !opcode_writes_register(next, c.i.rs)) { + pr_debug("Swapping opcodes at offset 0x%x to " + "respect load delay\n", i << 2); + + op = block->opcode_list[i]; + block->opcode_list[i] = block->opcode_list[i + 1]; + block->opcode_list[i + 1] = op; + skip_next = true; + } } + + in_ds = has_delay_slot(c); } - return ret; + return 0; } static int lightrec_local_branches(struct lightrec_state *state, struct block *block) { + const struct opcode *ds; struct opcode *list; unsigned int i; s32 offset; @@ -1311,25 +1371,19 @@ static int lightrec_local_branches(struct lightrec_state *state, struct block *b for (i = 0; i < block->nb_ops; i++) { list = &block->opcode_list[i]; - if (should_emulate(list)) + if (should_emulate(list) || !is_local_branch(block, i)) continue; - switch (list->i.op) { - case OP_BEQ: - case OP_BNE: - case OP_BLEZ: - case OP_BGTZ: - case OP_REGIMM: - offset = i + 1 + (s16)list->i.imm; - if (offset >= 0 && offset < block->nb_ops) - break; - fallthrough; - default: - continue; - } + offset = i + 1 + (s16)list->c.i.imm; pr_debug("Found local branch to offset 0x%x\n", offset << 2); + ds = get_delay_slot(block->opcode_list, i); + if (op_flag_load_delay(ds->flags) && opcode_is_load(ds->c)) { + pr_debug("Branch delay slot has a load delay - skip\n"); + continue; + } + if (should_emulate(&block->opcode_list[offset])) { pr_debug("Branch target must be emulated - skip\n"); continue; @@ -1388,7 +1442,7 @@ static bool op_writes_rd(union code c) { switch (c.i.op) { case OP_SPECIAL: - case OP_META_MOV: + case OP_META: return true; default: return false; @@ -1447,7 +1501,7 @@ static int lightrec_early_unload(struct lightrec_state *state, struct block *blo struct opcode *op; s16 last_r[34], last_w[34], last_sync = 0, next_sync = 0; u64 mask_r, mask_w, dirty = 0, loaded = 0; - u8 reg; + u8 reg, load_delay_reg = 0; memset(last_r, 0xff, sizeof(last_r)); memset(last_w, 0xff, sizeof(last_w)); @@ -1468,6 +1522,13 @@ static int lightrec_early_unload(struct lightrec_state *state, struct block *blo for (i = 0; i < block->nb_ops; i++) { op = &block->opcode_list[i]; + if (OPT_HANDLE_LOAD_DELAYS && load_delay_reg) { + /* Handle delayed register write from load opcodes in + * delay slots */ + last_w[load_delay_reg] = i; + load_delay_reg = 0; + } + if (op_flag_sync(op->flags) || should_emulate(op)) { /* The next opcode has the SYNC flag set, or is a branch * that should be emulated: unload all registers. */ @@ -1489,6 +1550,15 @@ static int lightrec_early_unload(struct lightrec_state *state, struct block *blo mask_r = opcode_read_mask(op->c); mask_w = opcode_write_mask(op->c); + if (op_flag_load_delay(op->flags) && opcode_is_load(op->c)) { + /* If we have a load opcode in a delay slot, its target + * register is actually not written there but at a + * later point, in the dispatcher. Prevent the algorithm + * from discarding its previous value. */ + load_delay_reg = op->c.i.rt; + mask_w &= ~BIT(op->c.i.rt); + } + for (reg = 0; reg < 34; reg++) { if (mask_r & BIT(reg)) { if (dirty & BIT(reg) && last_w[reg] < last_sync) { @@ -1553,37 +1623,32 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) for (i = 0; i < block->nb_ops; i++) { list = &block->opcode_list[i]; - lightrec_consts_propagate(block->opcode_list, i, v); + lightrec_consts_propagate(block, i, v); switch (list->i.op) { case OP_SB: case OP_SH: case OP_SW: - if (OPT_FLAG_STORES) { - /* Mark all store operations that target $sp or $gp - * as not requiring code invalidation. This is based - * on the heuristic that stores using one of these - * registers as address will never hit a code page. */ - if (list->i.rs >= 28 && list->i.rs <= 29 && - !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) { - pr_debug("Flaging opcode 0x%08x as not " - "requiring invalidation\n", - list->opcode); - list->flags |= LIGHTREC_NO_INVALIDATE; - list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); - } + /* Mark all store operations that target $sp or $gp + * as not requiring code invalidation. This is based + * on the heuristic that stores using one of these + * registers as address will never hit a code page. */ + if (list->i.rs >= 28 && list->i.rs <= 29 && + !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) { + pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n", + list->opcode); + list->flags |= LIGHTREC_NO_INVALIDATE; + } - /* Detect writes whose destination address is inside the - * current block, using constant propagation. When these - * occur, we mark the blocks as not compilable. */ - if (is_known(v, list->i.rs) && - kunseg(v[list->i.rs].value) >= kunseg(block->pc) && - kunseg(v[list->i.rs].value) < (kunseg(block->pc) + - block->nb_ops * 4)) { - pr_debug("Self-modifying block detected\n"); - block_set_flags(block, BLOCK_NEVER_COMPILE); - list->flags |= LIGHTREC_SMC; - } + /* Detect writes whose destination address is inside the + * current block, using constant propagation. When these + * occur, we mark the blocks as not compilable. */ + if (is_known(v, list->i.rs) && + kunseg(v[list->i.rs].value) >= kunseg(block->pc) && + kunseg(v[list->i.rs].value) < (kunseg(block->pc) + block->nb_ops * 4)) { + pr_debug("Self-modifying block detected\n"); + block_set_flags(block, BLOCK_NEVER_COMPILE); + list->flags |= LIGHTREC_SMC; } fallthrough; case OP_SWL: @@ -1597,8 +1662,7 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) case OP_LWL: case OP_LWR: case OP_LWC2: - if (OPT_FLAG_IO && - (v[list->i.rs].known | v[list->i.rs].sign)) { + if (v[list->i.rs].known | v[list->i.rs].sign) { psx_map = lightrec_get_constprop_map(state, v, list->i.rs, (s16) list->i.imm); @@ -1664,6 +1728,16 @@ static int lightrec_flag_io(struct lightrec_state *state, struct block *block) break; } } + + if (!LIGHTREC_FLAGS_GET_IO_MODE(list->flags) + && list->i.rs >= 28 && list->i.rs <= 29 + && !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) { + /* Assume that all I/O operations that target + * $sp or $gp will always only target a mapped + * memory (RAM, BIOS, scratchpad). */ + list->flags |= LIGHTREC_IO_MODE(LIGHTREC_IO_DIRECT); + } + fallthrough; default: break; @@ -1862,7 +1936,7 @@ static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block * for (i = 0; i < block->nb_ops - 1; i++) { list = &block->opcode_list[i]; - lightrec_consts_propagate(block->opcode_list, i, v); + lightrec_consts_propagate(block, i, v); switch (list->i.op) { case OP_SPECIAL: @@ -2079,11 +2153,13 @@ static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block * IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence), IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset), IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches), + IF_OPT(OPT_HANDLE_LOAD_DELAYS, &lightrec_handle_load_delays), + IF_OPT(OPT_HANDLE_LOAD_DELAYS, &lightrec_swap_load_delays), IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_branches), IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches), IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops), IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots), - IF_OPT(OPT_FLAG_IO || OPT_FLAG_STORES, &lightrec_flag_io), + IF_OPT(OPT_FLAG_IO, &lightrec_flag_io), IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs), IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload), }; diff --git a/deps/lightrec/optimizer.h b/deps/lightrec/optimizer.h index 825042df..f2b1f30f 100644 --- a/deps/lightrec/optimizer.h +++ b/deps/lightrec/optimizer.h @@ -11,14 +11,16 @@ struct block; struct opcode; -_Bool opcode_reads_register(union code op, u8 reg); -_Bool opcode_writes_register(union code op, u8 reg); -_Bool has_delay_slot(union code op); +__cnst _Bool opcode_reads_register(union code op, u8 reg); +__cnst _Bool opcode_writes_register(union code op, u8 reg); +__cnst u64 opcode_write_mask(union code op); +__cnst _Bool has_delay_slot(union code op); _Bool is_delay_slot(const struct opcode *list, unsigned int offset); -_Bool load_in_delay_slot(union code op); -_Bool opcode_is_io(union code op); -_Bool is_unconditional_jump(union code c); -_Bool is_syscall(union code c); +__cnst _Bool opcode_is_mfc(union code op); +__cnst _Bool opcode_is_load(union code op); +__cnst _Bool opcode_is_io(union code op); +__cnst _Bool is_unconditional_jump(union code c); +__cnst _Bool is_syscall(union code c); _Bool should_emulate(const struct opcode *op); diff --git a/deps/lightrec/regcache.c b/deps/lightrec/regcache.c index c62ba3d5..2a7ffe92 100644 --- a/deps/lightrec/regcache.c +++ b/deps/lightrec/regcache.c @@ -49,6 +49,10 @@ static const char * mips_regs[] = { "lo", "hi", }; +/* Forward declaration(s) */ +static void clean_reg(jit_state_t *_jit, + struct native_register *nreg, u8 jit_reg, bool clean); + const char * lightrec_reg_name(u8 reg) { return mips_regs[reg]; @@ -219,14 +223,7 @@ static void lightrec_discard_nreg(struct native_register *nreg) static void lightrec_unload_nreg(struct regcache *cache, jit_state_t *_jit, struct native_register *nreg, u8 jit_reg) { - /* If we get a dirty register, store back the old value */ - if (nreg->prio == REG_IS_DIRTY) { - s16 offset = offsetof(struct lightrec_state, regs.gpr) - + (nreg->emulated_register << 2); - - jit_stxi_i(offset, LIGHTREC_REG_STATE, jit_reg); - } - + clean_reg(_jit, nreg, jit_reg, false); lightrec_discard_nreg(nreg); } @@ -519,6 +516,7 @@ void lightrec_free_regs(struct regcache *cache) static void clean_reg(jit_state_t *_jit, struct native_register *nreg, u8 jit_reg, bool clean) { + /* If we get a dirty register, store back the old value */ if (nreg->prio == REG_IS_DIRTY) { s16 offset = offsetof(struct lightrec_state, regs.gpr) + (nreg->emulated_register << 2); @@ -579,6 +577,11 @@ void lightrec_clean_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg) } } +bool lightrec_reg_is_loaded(struct regcache *cache, u16 reg) +{ + return !!find_mapped_reg(cache, reg, false); +} + void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, u16 reg, bool unload) { diff --git a/deps/lightrec/regcache.h b/deps/lightrec/regcache.h index d242c54b..55f1cfd9 100644 --- a/deps/lightrec/regcache.h +++ b/deps/lightrec/regcache.h @@ -8,8 +8,13 @@ #include "lightning-wrapper.h" -#define NUM_REGS (JIT_V_NUM - 1) -#define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1)) +#if defined(__sh__) +# define NUM_REGS JIT_V_NUM +# define LIGHTREC_REG_STATE _GBR +#else +# define NUM_REGS (JIT_V_NUM - 1) +# define LIGHTREC_REG_STATE (JIT_V(JIT_V_NUM - 1)) +#endif #if defined(__powerpc__) # define NUM_TEMPS JIT_R_NUM @@ -68,6 +73,7 @@ void lightrec_unload_reg(struct regcache *cache, jit_state_t *_jit, u8 jit_reg); void lightrec_storeback_regs(struct regcache *cache, jit_state_t *_jit); _Bool lightrec_has_dirty_regs(struct regcache *cache); +_Bool lightrec_reg_is_loaded(struct regcache *cache, u16 reg); void lightrec_clean_reg_if_loaded(struct regcache *cache, jit_state_t *_jit, u16 reg, _Bool unload); void lightrec_discard_reg_if_loaded(struct regcache *cache, u16 reg); @@ -82,7 +88,7 @@ void lightrec_regcache_leave_branch(struct regcache *cache, struct regcache * lightrec_regcache_init(struct lightrec_state *state); void lightrec_free_regcache(struct regcache *cache); -const char * lightrec_reg_name(u8 reg); +__cnst const char * lightrec_reg_name(u8 reg); void lightrec_regcache_mark_live(struct regcache *cache, jit_state_t *_jit); diff --git a/include/lightning/lightning.h b/include/lightning/lightning.h index 23015a44..9c99ad92 100644 --- a/include/lightning/lightning.h +++ b/include/lightning/lightning.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012-2019 Free Software Foundation, Inc. + * Copyright (C) 2012-2023 Free Software Foundation, Inc. * * This file is part of GNU lightning. * @@ -151,6 +151,8 @@ typedef jit_int32_t jit_fpr_t; # include #elif defined(__riscv) # include +#elif defined(__loongarch__) +# include #endif #define jit_flag_node 0x0001 /* patch node not absolute */ @@ -188,6 +190,8 @@ typedef enum { #define jit_align(u) jit_new_node_w(jit_code_align, u) jit_code_live, jit_code_align, jit_code_save, jit_code_load, +#define jit_skip(u) jit_new_node_w(jit_code_skip, u) + jit_code_skip, #define jit_name(u) _jit_name(_jit,u) jit_code_name, #define jit_note(u, v) _jit_note(_jit, u, v) @@ -208,27 +212,80 @@ typedef enum { #define jit_allocar(u, v) _jit_allocar(_jit,u,v) jit_code_allocai, jit_code_allocar, -#define jit_arg() _jit_arg(_jit) - jit_code_arg, +#define jit_arg_c() _jit_arg(_jit, jit_code_arg_c) +#define jit_arg_s() _jit_arg(_jit, jit_code_arg_s) +#define jit_arg_i() _jit_arg(_jit, jit_code_arg_i) +# if __WORDSIZE == 32 +# define jit_arg() jit_arg_i() +#else +# define jit_arg_l() _jit_arg(_jit, jit_code_arg_l) +# define jit_arg() jit_arg_l() +#endif + jit_code_arg_c, jit_code_arg_s, + jit_code_arg_i, jit_code_arg_l, +#if __WORDSIZE == 32 +# define jit_code_arg jit_code_arg_i +#else +# define jit_code_arg jit_code_arg_l +#endif + #define jit_getarg_c(u,v) _jit_getarg_c(_jit,u,v) #define jit_getarg_uc(u,v) _jit_getarg_uc(_jit,u,v) - jit_code_getarg_c, jit_code_getarg_uc, #define jit_getarg_s(u,v) _jit_getarg_s(_jit,u,v) #define jit_getarg_us(u,v) _jit_getarg_us(_jit,u,v) - jit_code_getarg_s, jit_code_getarg_us, #define jit_getarg_i(u,v) _jit_getarg_i(_jit,u,v) #if __WORDSIZE == 32 # define jit_getarg(u,v) jit_getarg_i(u,v) #else -# define jit_getarg(u,v) jit_getarg_l(u,v) # define jit_getarg_ui(u,v) _jit_getarg_ui(_jit,u,v) # define jit_getarg_l(u,v) _jit_getarg_l(_jit,u,v) +# define jit_getarg(u,v) jit_getarg_l(u,v) #endif + jit_code_getarg_c, jit_code_getarg_uc, + jit_code_getarg_s, jit_code_getarg_us, jit_code_getarg_i, jit_code_getarg_ui, jit_code_getarg_l, -# define jit_putargr(u,v) _jit_putargr(_jit,u,v) -# define jit_putargi(u,v) _jit_putargi(_jit,u,v) - jit_code_putargr, jit_code_putargi, +#if __WORDSIZE == 32 +# define jit_code_getarg jit_code_getarg_i +#else +# define jit_code_getarg jit_code_getarg_l +#endif + +#define jit_putargr_c(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_c) +#define jit_putargi_c(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_c) +#define jit_putargr_uc(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_uc) +#define jit_putargi_uc(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_uc) +#define jit_putargr_s(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_s) +#define jit_putargi_s(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_s) +#define jit_putargr_us(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_us) +#define jit_putargi_us(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_us) +#define jit_putargr_i(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_i) +#define jit_putargi_i(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_i) +#if __WORDSIZE == 32 +# define jit_putargr(u,v) jit_putargr_i(u,v) +# define jit_putargi(u,v) jit_putargi_i(u,v) +#else +# define jit_putargr_ui(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_ui) +# define jit_putargi_ui(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_ui) +# define jit_putargr_l(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_l) +# define jit_putargi_l(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_l) +# define jit_putargr(u,v) jit_putargr_l(u,v) +# define jit_putargi(u,v) jit_putargi_l(u,v) +#endif + jit_code_putargr_c, jit_code_putargi_c, + jit_code_putargr_uc, jit_code_putargi_uc, + jit_code_putargr_s, jit_code_putargi_s, + jit_code_putargr_us, jit_code_putargi_us, + jit_code_putargr_i, jit_code_putargi_i, + jit_code_putargr_ui, jit_code_putargi_ui, + jit_code_putargr_l, jit_code_putargi_l, +#if __WORDSIZE == 32 +# define jit_code_putargr jit_code_putargr_i +# define jit_code_putargi jit_code_putargi_i +#else +# define jit_code_putargr jit_code_putargr_l +# define jit_code_putargi jit_code_putargi_l +#endif #define jit_va_start(u) jit_new_node_w(jit_code_va_start, u) jit_code_va_start, @@ -350,6 +407,10 @@ typedef enum { #define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w) jit_code_movnr, jit_code_movzr, + jit_code_casr, jit_code_casi, +#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x) +#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x) + #define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v) #define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v) jit_code_extr_c, jit_code_extr_uc, @@ -362,6 +423,18 @@ typedef enum { #endif jit_code_extr_i, jit_code_extr_ui, +#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v) + jit_code_bswapr_us, +#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) + jit_code_bswapr_ui, +#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) + jit_code_bswapr_ul, +#if __WORDSIZE == 32 +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) +#else +#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) +#endif + #define jit_htonr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v) #define jit_ntohr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v) jit_code_htonr_us, @@ -548,33 +621,106 @@ typedef enum { #define jit_prepare() _jit_prepare(_jit) jit_code_prepare, -#define jit_pushargr(u) _jit_pushargr(_jit,u) -#define jit_pushargi(u) _jit_pushargi(_jit,u) - jit_code_pushargr, jit_code_pushargi, + +#define jit_pushargr_c(u) _jit_pushargr(_jit,u,jit_code_pushargr_c) +#define jit_pushargi_c(u) _jit_pushargi(_jit,u,jit_code_pushargi_c) +#define jit_pushargr_uc(u) _jit_pushargr(_jit,u,jit_code_pushargr_uc) +#define jit_pushargi_uc(u) _jit_pushargi(_jit,u,jit_code_pushargi_uc) +#define jit_pushargr_s(u) _jit_pushargr(_jit,u,jit_code_pushargr_s) +#define jit_pushargi_s(u) _jit_pushargi(_jit,u,jit_code_pushargi_s) +#define jit_pushargr_us(u) _jit_pushargr(_jit,u,jit_code_pushargr_us) +#define jit_pushargi_us(u) _jit_pushargi(_jit,u,jit_code_pushargi_us) +#define jit_pushargr_i(u) _jit_pushargr(_jit,u,jit_code_pushargr_i) +#define jit_pushargi_i(u) _jit_pushargi(_jit,u,jit_code_pushargi_i) +#if __WORDSIZE == 32 +# define jit_pushargr(u) jit_pushargr_i(u) +# define jit_pushargi(u) jit_pushargi_i(u) +#else +# define jit_pushargr_ui(u) _jit_pushargr(_jit,u,jit_code_pushargr_ui) +# define jit_pushargi_ui(u) _jit_pushargi(_jit,u,jit_code_pushargi_ui) +# define jit_pushargr_l(u) _jit_pushargr(_jit,u,jit_code_pushargr_l) +# define jit_pushargi_l(u) _jit_pushargi(_jit,u,jit_code_pushargi_l) +# define jit_pushargr(u) jit_pushargr_l(u) +# define jit_pushargi(u) jit_pushargi_l(u) +#endif + jit_code_pushargr_c, jit_code_pushargi_c, + jit_code_pushargr_uc, jit_code_pushargi_uc, + jit_code_pushargr_s, jit_code_pushargi_s, + jit_code_pushargr_us, jit_code_pushargi_us, + jit_code_pushargr_i, jit_code_pushargi_i, + jit_code_pushargr_ui, jit_code_pushargi_ui, + jit_code_pushargr_l, jit_code_pushargi_l, +#if __WORDSIZE == 32 +# define jit_code_pushargr jit_code_pushargr_i +# define jit_code_pushargi jit_code_pushargi_i +#else +# define jit_code_pushargr jit_code_pushargr_l +# define jit_code_pushargi jit_code_pushargi_l +#endif + #define jit_finishr(u) _jit_finishr(_jit,u) #define jit_finishi(u) _jit_finishi(_jit,u) jit_code_finishr, jit_code_finishi, #define jit_ret() _jit_ret(_jit) jit_code_ret, -#define jit_retr(u) _jit_retr(_jit,u) -#define jit_reti(u) _jit_reti(_jit,u) - jit_code_retr, jit_code_reti, + +#define jit_retr_c(u) _jit_retr(_jit,u,jit_code_retr_c) +#define jit_reti_c(u) _jit_reti(_jit,u,jit_code_reti_c) +#define jit_retr_uc(u) _jit_retr(_jit,u,jit_code_retr_uc) +#define jit_reti_uc(u) _jit_reti(_jit,u,jit_code_reti_uc) +#define jit_retr_s(u) _jit_retr(_jit,u,jit_code_retr_s) +#define jit_reti_s(u) _jit_reti(_jit,u,jit_code_reti_s) +#define jit_retr_us(u) _jit_retr(_jit,u,jit_code_retr_us) +#define jit_reti_us(u) _jit_reti(_jit,u,jit_code_reti_us) +#define jit_retr_i(u) _jit_retr(_jit,u,jit_code_retr_i) +#define jit_reti_i(u) _jit_reti(_jit,u,jit_code_reti_i) +#if __WORDSIZE == 32 +# define jit_retr(u) jit_retr_i(u) +# define jit_reti(u) jit_reti_i(u) +#else +# define jit_retr_ui(u) _jit_retr(_jit,u,jit_code_retr_ui) +# define jit_reti_ui(u) _jit_reti(_jit,u,jit_code_reti_ui) +# define jit_retr_l(u) _jit_retr(_jit,u,jit_code_retr_l) +# define jit_reti_l(u) _jit_reti(_jit,u,jit_code_reti_l) +# define jit_retr(u) jit_retr_l(u) +# define jit_reti(u) jit_reti_l(u) +#endif + jit_code_retr_c, jit_code_reti_c, + jit_code_retr_uc, jit_code_reti_uc, + jit_code_retr_s, jit_code_reti_s, + jit_code_retr_us, jit_code_reti_us, + jit_code_retr_i, jit_code_reti_i, + jit_code_retr_ui, jit_code_reti_ui, + jit_code_retr_l, jit_code_reti_l, +#if __WORDSIZE == 32 +# define jit_code_retr jit_code_retr_i +# define jit_code_reti jit_code_reti_i +#else +# define jit_code_retr jit_code_retr_l +# define jit_code_reti jit_code_reti_l +#endif + #define jit_retval_c(u) _jit_retval_c(_jit,u) #define jit_retval_uc(u) _jit_retval_uc(_jit,u) - jit_code_retval_c, jit_code_retval_uc, #define jit_retval_s(u) _jit_retval_s(_jit,u) #define jit_retval_us(u) _jit_retval_us(_jit,u) - jit_code_retval_s, jit_code_retval_us, #define jit_retval_i(u) _jit_retval_i(_jit,u) #if __WORDSIZE == 32 # define jit_retval(u) jit_retval_i(u) #else -# define jit_retval(u) jit_retval_l(u) # define jit_retval_ui(u) _jit_retval_ui(_jit,u) # define jit_retval_l(u) _jit_retval_l(_jit,u) +# define jit_retval(u) jit_retval_l(u) #endif + jit_code_retval_c, jit_code_retval_uc, + jit_code_retval_s, jit_code_retval_us, jit_code_retval_i, jit_code_retval_ui, jit_code_retval_l, +#if __WORDSIZE == 32 +# define jit_code_retval jit_code_retval_i +#else +# define jit_code_retval jit_code_retval_l +#endif #define jit_epilog() _jit_epilog(_jit) jit_code_epilog, @@ -902,21 +1048,13 @@ typedef enum { #define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v) #define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v) -#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v) - jit_code_bswapr_us, -#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) - jit_code_bswapr_ui, -#define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) - jit_code_bswapr_ul, -#if __WORDSIZE == 32 -#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v) -#else -#define jit_bswapr(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v) -#endif +#define jit_clor(u,v) jit_new_node_ww(jit_code_clor,u,v) +#define jit_clzr(u,v) jit_new_node_ww(jit_code_clzr,u,v) + jit_code_clor, jit_code_clzr, - jit_code_casr, jit_code_casi, -#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x) -#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x) +#define jit_ctor(u,v) jit_new_node_ww(jit_code_ctor,u,v) +#define jit_ctzr(u,v) jit_new_node_ww(jit_code_ctzr,u,v) + jit_code_ctor, jit_code_ctzr, jit_code_last_code } jit_code_t; @@ -958,7 +1096,8 @@ extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t); extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t); extern void _jit_ellipsis(jit_state_t*); -extern jit_node_t *_jit_arg(jit_state_t*); +extern jit_node_t *_jit_arg(jit_state_t*, jit_code_t); + extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*); @@ -968,19 +1107,24 @@ extern void _jit_getarg_i(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*); extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*); #endif -extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*); -extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*); + +extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*, jit_code_t); +extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*, jit_code_t); extern void _jit_prepare(jit_state_t*); extern void _jit_ellipsis(jit_state_t*); extern void _jit_va_push(jit_state_t*, jit_gpr_t); -extern void _jit_pushargr(jit_state_t*, jit_gpr_t); -extern void _jit_pushargi(jit_state_t*, jit_word_t); + +extern void _jit_pushargr(jit_state_t*, jit_gpr_t, jit_code_t); +extern void _jit_pushargi(jit_state_t*, jit_word_t, jit_code_t); + extern void _jit_finishr(jit_state_t*, jit_gpr_t); extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t); extern void _jit_ret(jit_state_t*); -extern void _jit_retr(jit_state_t*, jit_gpr_t); -extern void _jit_reti(jit_state_t*, jit_word_t); + +extern void _jit_retr(jit_state_t*, jit_gpr_t, jit_code_t); +extern void _jit_reti(jit_state_t*, jit_word_t, jit_code_t); + extern void _jit_retval_c(jit_state_t*, jit_gpr_t); extern void _jit_retval_uc(jit_state_t*, jit_gpr_t); extern void _jit_retval_s(jit_state_t*, jit_gpr_t); @@ -990,6 +1134,7 @@ extern void _jit_retval_i(jit_state_t*, jit_gpr_t); extern void _jit_retval_ui(jit_state_t*, jit_gpr_t); extern void _jit_retval_l(jit_state_t*, jit_gpr_t); #endif + extern void _jit_epilog(jit_state_t*); #define jit_patch(u) _jit_patch(_jit,u) @@ -1014,6 +1159,10 @@ extern void _jit_frame(jit_state_t*, jit_int32_t); extern void _jit_tramp(jit_state_t*, jit_int32_t); #define jit_emit() _jit_emit(_jit) extern jit_pointer_t _jit_emit(jit_state_t*); +#define jit_unprotect() _jit_unprotect(_jit) +extern void _jit_unprotect(jit_state_t*); +#define jit_protect() _jit_protect(_jit) +extern void _jit_protect(jit_state_t*); #define jit_print() _jit_print(_jit) extern void _jit_print(jit_state_t*); diff --git a/include/lightrec/lightrec-config.h b/include/lightrec/lightrec-config.h index 791eedf4..79ab7a6d 100644 --- a/include/lightrec/lightrec-config.h +++ b/include/lightrec/lightrec-config.h @@ -16,10 +16,10 @@ #define OPT_REMOVE_DIV_BY_ZERO_SEQ 1 #define OPT_REPLACE_MEMSET 1 #define OPT_DETECT_IMPOSSIBLE_BRANCHES 1 +#define OPT_HANDLE_LOAD_DELAYS 1 #define OPT_TRANSFORM_OPS 1 #define OPT_LOCAL_BRANCHES 1 #define OPT_SWITCH_DELAY_SLOTS 1 -#define OPT_FLAG_STORES 1 #define OPT_FLAG_IO 1 #define OPT_FLAG_MULT_DIV 1 #define OPT_EARLY_UNLOAD 1