git subrepo pull (merge) --force deps/lightning
authorPaul Cercueil <paul@crapouillou.net>
Fri, 24 Feb 2023 23:35:38 +0000 (23:35 +0000)
committerPaul Cercueil <paul@crapouillou.net>
Sun, 9 Jul 2023 11:55:56 +0000 (13:55 +0200)
subrepo:
  subdir:   "deps/lightning"
  merged:   "b1983e9036"
upstream:
  origin:   "https://github.com/pcercuei/gnu_lightning.git"
  branch:   "pcsx_rearmed"
  commit:   "b1983e9036"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "2f68596"

108 files changed:
deps/lightning/.gitignore
deps/lightning/.gitrepo
deps/lightning/ChangeLog
deps/lightning/Makefile.am
deps/lightning/THANKS
deps/lightning/TODO
deps/lightning/check/Makefile.am
deps/lightning/check/all.tst
deps/lightning/check/allocar.tst
deps/lightning/check/bit.ok [new file with mode: 0644]
deps/lightning/check/bit.tst [new file with mode: 0644]
deps/lightning/check/call.tst
deps/lightning/check/carg.c
deps/lightning/check/catomic.c
deps/lightning/check/ccall.c
deps/lightning/check/factorial.tst [new file with mode: 0644]
deps/lightning/check/fib.tst
deps/lightning/check/float.tst
deps/lightning/check/lightning.c
deps/lightning/check/protect.c [new file with mode: 0644]
deps/lightning/check/put.tst
deps/lightning/check/riprel.c [new file with mode: 0644]
deps/lightning/check/riprel.ok [new file with mode: 0644]
deps/lightning/check/setcode.c
deps/lightning/check/skip.ok [new file with mode: 0644]
deps/lightning/check/skip.tst [new file with mode: 0644]
deps/lightning/check/stack.tst
deps/lightning/configure.ac
deps/lightning/doc/Makefile.am
deps/lightning/doc/body.texi
deps/lightning/doc/rpn.c
deps/lightning/include/Makefile.am
deps/lightning/include/lightning.h.in
deps/lightning/include/lightning/jit_aarch64.h
deps/lightning/include/lightning/jit_alpha.h
deps/lightning/include/lightning/jit_arm.h
deps/lightning/include/lightning/jit_hppa.h
deps/lightning/include/lightning/jit_ia64.h
deps/lightning/include/lightning/jit_loongarch.h
deps/lightning/include/lightning/jit_mips.h
deps/lightning/include/lightning/jit_ppc.h
deps/lightning/include/lightning/jit_private.h
deps/lightning/include/lightning/jit_riscv.h
deps/lightning/include/lightning/jit_s390.h
deps/lightning/include/lightning/jit_sparc.h
deps/lightning/include/lightning/jit_x86.h
deps/lightning/lib/Makefile.am
deps/lightning/lib/aarch64-logical-immediates.c [new file with mode: 0644]
deps/lightning/lib/jit_aarch64-cpu.c
deps/lightning/lib/jit_aarch64-fpu.c
deps/lightning/lib/jit_aarch64-sz.c
deps/lightning/lib/jit_aarch64.c
deps/lightning/lib/jit_alpha-cpu.c
deps/lightning/lib/jit_alpha-fpu.c
deps/lightning/lib/jit_alpha-sz.c
deps/lightning/lib/jit_alpha.c
deps/lightning/lib/jit_arm-cpu.c
deps/lightning/lib/jit_arm-swf.c
deps/lightning/lib/jit_arm-sz.c
deps/lightning/lib/jit_arm-vfp.c
deps/lightning/lib/jit_arm.c
deps/lightning/lib/jit_disasm.c
deps/lightning/lib/jit_fallback.c
deps/lightning/lib/jit_hppa-cpu.c
deps/lightning/lib/jit_hppa-fpu.c
deps/lightning/lib/jit_hppa-sz.c
deps/lightning/lib/jit_hppa.c
deps/lightning/lib/jit_ia64-cpu.c
deps/lightning/lib/jit_ia64-fpu.c
deps/lightning/lib/jit_ia64-sz.c
deps/lightning/lib/jit_ia64.c
deps/lightning/lib/jit_loongarch-cpu.c
deps/lightning/lib/jit_loongarch-fpu.c
deps/lightning/lib/jit_loongarch-sz.c
deps/lightning/lib/jit_loongarch.c
deps/lightning/lib/jit_memory.c
deps/lightning/lib/jit_mips-cpu.c
deps/lightning/lib/jit_mips-fpu.c
deps/lightning/lib/jit_mips-sz.c
deps/lightning/lib/jit_mips.c
deps/lightning/lib/jit_names.c
deps/lightning/lib/jit_note.c
deps/lightning/lib/jit_ppc-cpu.c
deps/lightning/lib/jit_ppc-fpu.c
deps/lightning/lib/jit_ppc-sz.c
deps/lightning/lib/jit_ppc.c
deps/lightning/lib/jit_print.c
deps/lightning/lib/jit_rewind.c
deps/lightning/lib/jit_riscv-cpu.c
deps/lightning/lib/jit_riscv-fpu.c
deps/lightning/lib/jit_riscv-sz.c
deps/lightning/lib/jit_riscv.c
deps/lightning/lib/jit_s390-cpu.c
deps/lightning/lib/jit_s390-fpu.c
deps/lightning/lib/jit_s390-sz.c
deps/lightning/lib/jit_s390.c
deps/lightning/lib/jit_size.c
deps/lightning/lib/jit_sparc-cpu.c
deps/lightning/lib/jit_sparc-fpu.c
deps/lightning/lib/jit_sparc-sz.c
deps/lightning/lib/jit_sparc.c
deps/lightning/lib/jit_x86-cpu.c
deps/lightning/lib/jit_x86-sse.c
deps/lightning/lib/jit_x86-sz.c
deps/lightning/lib/jit_x86-x87.c
deps/lightning/lib/jit_x86.c
deps/lightning/lib/lightning.c
deps/lightning/size.c

index 6fc5bf9..bc7e971 100644 (file)
@@ -1,3 +1,4 @@
+/build-aux
 +*
 
 *.o
index 6cc0878..17edd68 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/gnu_lightning.git
        branch = pcsx_rearmed
-       commit = b910a469a9bea63056eb53430dea4c7b56e447a8
-       parent = 13b02197fcb7575646408094d5583ed7391b1153
+       commit = b1983e9036d35933ffa773d81b61eedbf3ae3b93
+       parent = 638335fabe3ba77b2a5c624a4c4aec52c18488f7
        method = merge
        cmdver = 0.4.3
index 40ade7a..2cd5273 100644 (file)
@@ -1,3 +1,190 @@
+2023-02-23 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h: Add new 'inst' field to
+       jit_compiler_t, if __mips__ is defined. This field is a simple
+       helper for a pending instruction to be emitted, and that can
+       be emitted out of order.
+       * lib/jit_fallback.c: Update for changes in internal mips patching
+       and jumping macros and function calls.
+       * lib/jit_mips-cpu.c: Core of changes to attempt to fill delay
+       slots with instructions that can be emitted out of order.
+       * lib/jit_mips-fpu.c: Update to use delay slot in branches.
+       * lib/jit_mips.c: Update for new delay slot use logic.
+
+2023-02-20 Paulo Andrade <pcpa@gnu.org>
+
+       * check/float.tst: Add conditionals for mips release for expected
+       NaN truncated to an integer.
+       * check/lightning.c: Add extra preprocessor for mips release.
+       * include/lightning/jit_mips.h: Make the NEW_ABI preprocessor
+       defined to zero if using the n32 or n64 abis. This makes it
+       easier to create runtime checks with an always true or false
+       condition.
+       * lib/jit_mips-cpu.c, lib/jit_mips-fpu.c: Implement mips release
+       6 support.
+       * lib/jit_mips.c: Add more reliable mips release detection code.
+
+2023-02-09 Paulo Andrade <pcpa@gnu.org>
+
+       * check/Makefile.am: Update for new bit.tst test, to check the
+       new clor, clzr, ctor and ctzr instructions.
+       * check/all.tst: Update to verify encoding of new instructions.
+       * check/lightning.c: Update to have the lightning "assembler"
+       understanding the new instructions.
+       * include/lightning.h.in: Define new codes for new instructions.
+       * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c,
+       lib/jit_ia64.c, lib/jit_loongarch.c, lib/jit_mips.c, lib/jit_ppc.c,
+       lib/jit_riscv.c, lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c:
+       Implement fallback version of new instructions.
+       * lib/jit_fallback.c: Actual implementation of the fallbacks of
+       the new instructions.
+       * lib/jit_names.c: Update to print debug information of new
+       instructions.
+
+2023-01-26 Paulo Andrade <pcpa@gnu.org>
+
+       * check/riprel.c, check/riprel.ok: New check files.
+       * check/Makefile.am: Support for new riprel test.
+       * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86.c: Implement
+       %rip relative addressing when reliable. Currently disabled for
+       x32 and _WIN32; could be added for positive relative addresses
+       only where it should work.
+       * lib/lightning.c: Correct problem added in previous patch due
+       to not testing on a 32 bit environment.
+
+2023-01-23 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c, lib/jit_mips-cpu.c: Use pseudo instructions
+       "b" (BEQ(0,0,disp)) and "bal" (BGEZAL(0,disp)) for mips2, when an
+       unconditional branch or function call is known to be in range of a
+       relative jump. This should significantly reduce jit size generation.
+
+2023-01-20 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c, lib/jit_mips.c, lib/jit_rewind.c: Adapt
+       code to implement a variable framesize and optimize frame pointer
+       for simple leaf functions.
+
+2023-01-19 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_riscv.c, lib/jit_riscv-cpu.c: Adapt code to use a
+       variable framesize. Previously it was aligning the stack at
+       8 bytes, not 16. Now functions are called with a 16 byte aligned
+       stack.
+
+2023-01-18 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h: Include new framesize field
+       of jit_compiler_t; add new alist field for jit_function_t; add
+       new cvt_offset and need_stack fields specific to x86.
+       * lib/jit_x86.c, lib/jit_x86-cpu: Rewrite code to create stack
+       frames, so that less stack space can be used if no, or very few
+       callee save registers are modified in a function.
+       * jit_x86-sse.c, jit_x86-x87.c: Make CVT_OFFSET variable, and
+       dynamically allocated; this is required to avoid needing to
+       modify twice %rsp at function prologs, even if no stack space
+       is used.
+
+2022-11-09 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac: Add new --enable-devel-strong-type-checking
+       option.
+       * include/lightning.h.in: Rework to not need to know if
+       PACKED_STACK is defined, and add a new argument to _jit_arg,
+       _jit_putarg{r,i}, _jit_pusharg{r,i} and _jit_ret{r,i} to have
+       the same code path if PACKED_STACK is defined or not, and also
+       to implement STRONG_TYPE_CHECK enabled with the new
+       --enable-devel-strong-type-checking.
+       * include/lightning/jit_private.h: Add new macros to add assertions
+       for STRONG_TYPE_CHECK and avoid pasting tokens in jit_inc_synth*
+       when the token is not a static known value.
+       * lib/jit_aarch64.c: The first implementation of the new code,
+       working correctly in Apple M1 and with and without STRONG_TYPE_CHECK
+       in Linux.
+
+2022-11-08 Paulo Andrade <pcpa@gnu.org>
+
+       Add support for packed stack arguments as used by Apple M1
+       aarch64 cpus. This requires a major redesign in how Lightning
+       works, because contrary to all other supported ports, in this
+       case arguments must be truncated and sign/zero extended if
+       passed in registers, but when receiving the argument, there
+       is no need to truncate and sign/zero extend.
+       Return values are also treated this way. The callee must
+       truncate sign/zero extend, not the caller.
+       check/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS.
+       check/all.tst: Implement paired arg/getarg/pusharg/putarg/ret
+       codes to validate they do not generate assertions.
+       * check/allocar.tst, check/call.tst, check/fib.tst, check/put.tst,
+       check/stack.tst: Update to pass in all build types.
+       check/lightning.c: Add new codes for extra codes to handle
+       packed stack.
+       * configure.ac: Add a preprocessor define to know if packed stack
+       need is required. This is not really used, as it was moved to
+       jit_aarch64.h.
+       * doc/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS.
+       * doc/rpn.c: Update to pass in all build types.
+       include/lightning.h.in: Add new codes and reorder enum.
+       * include/lightning/jit_aarch64.h: Detect condition of needing
+       a packed stack.
+       * lib/jit_aarch64-sz.c: Regenerate.
+       * lib/jit_aarch64.c: Major updates for packed stack.
+       * lib/jit_names.c: Updates for debug output.
+       * lib/lightning.c: Update for new codes.
+
+2022-10-31  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+       Add new skip instruction.
+       * .gitignore: Update from Gnulib.
+       * check/Makefile.am: Add tests.
+       * check/lightning.c: Handle skip instructions.
+       * check/protect.c: Rewrite with skip.
+       * check/skip.ok: New test.
+       * check/skip.tst: New test.
+       * doc/body.texi: Document the skip instruction.
+       * include/lightning.h.in: Add the skip instruction.
+       * lib/jit_aarch64-sz.c: Update for skip instruction.
+       * lib/jit_aarch64.c: Implement skip instruction.
+       * lib/jit_alpha-sz.c: Update for skip instruction.
+       * lib/jit_alpha.c: Implement skip instruction.
+       * lib/jit_arm-sz.c: Update for skip instruction.
+       * lib/jit_arm.c: Implement skip instruction.
+       * lib/jit_hppa-sz.c: Update for skip instruction.
+       * lib/jit_hppa.c: Implement skip instruction.
+       * lib/jit_ia64-sz.c: Update for skip instruction.
+       * lib/jit_ia64.c: Implement skip instruction.
+       * lib/jit_loongarch-sz.c: Update for skip instruction.
+       * lib/jit_loongarch.c: Implement skip instruction.
+       * lib/jit_mips-sz.c: Update for skip instruction.
+       * lib/jit_mips.c: Implement skip instruction.
+       * lib/jit_names.c: Update for skip instruction.
+       * lib/jit_ppc-sz.c: Update for skip instruction.
+       * lib/jit_ppc.c: Implement skip instruction.
+       * lib/jit_riscv-sz.c: Update for skip instruction.
+       * lib/jit_riscv.c: Implement skip instruction.
+       * lib/jit_s390-sz.c: Update for skip instruction.
+       * lib/jit_s390.c: Implement skip instruction.
+       * lib/jit_size.c: Treat align and skip in a special way.
+       * lib/jit_sparc-sz.c: Update for skip instruction.
+       * lib/jit_sparc.c: Implement skip instruction.
+       * lib/jit_x86-sz.c: Update for skip instruction.
+       * lib/jit_x86.c: Implement skip instruction.
+       * lib/lightning.c: Classify skip instruction.
+
+2022-10-30  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+       Add user-visible functions jit_protect and jit_unprotect.
+       * check/Makefile.am: Add test for jit_protect and jit_unprotect.
+       * check/protect.c: New test.
+       * doc/body.texi: Add documentation for jit_protect and
+       jit_unprotect.
+       * include/lightning.h.in: Add prototypes for jit_protect and
+       jit_unprotect.
+       * include/lightning/jit_private.h: Add a field to store the size
+       of the protected memory.
+       * lib/lightning.c: Remember the size of the protected memory and
+       implement the two new functions.
+
 2022-10-12 Paulo Andrade <pcpa@gnu.org>
 
        * include/lightning/jit_loongarch.h, lib/jit_loongarch-cpu.c,
index 112deae..8dbbaef 100644 (file)
@@ -1,5 +1,5 @@
 #
-# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc.
 #
 # This file is part of GNU lightning.
 #
index 0e0f1a9..d5737af 100644 (file)
@@ -19,3 +19,4 @@ Holger Hans Peter Freyther      <holger@moiji-mobile.com>
 Jon Arintok                     <jon.arintok@gmail.com>
 Bruno Haible                    <bruno@clisp.org>
 Marc Nieper-Wißkirchen                <marc@nieper-wisskirchen.de>
+Paul Cercueil                   <paul@crapouillou.net>
index 676af02..8b13789 100644 (file)
@@ -1,28 +1 @@
-       * Validate that divrem in jit_x86-cpu.c is not modifying
-       the non result arguments. This is not verified by clobber.tst,
-       as it only checks registers not involved in the operation
-       (because it does not know about values being set as input
-       for the the operation).
 
-       * Write a simple higher level language implementation generating
-       jit with lightning, that could be some lisp or C like language.
-
-       * rerun ./configure --enable-devel-get-jit-size and regenerate
-       the related jit_$arch-sz.c for the ports where nodata is
-       meaningful:
-       hppa            (done)
-       i586            (done)
-       ia64
-       mips o32        (done)
-       mips n32
-       mips n64
-       powerpc 32      (done)
-       powerpc 64      (done)
-       ppc
-       s390x           (done)
-       sparc           (done)
-       x86_64          (done)
-       Missing ones are due to no longer (remote) access to such hosts
-       and may be broken with jit_set_data(..., JIT_DISABLE_DATA).
-       (ia64 hp-ux or linx), (irix mips for 32 or 64 abi), and
-       (darwin ppc).
index 10537b1..c77f5cd 100644 (file)
@@ -1,5 +1,5 @@
 #
-# Copyright 2012-2022 Free Software Foundation, Inc.
+# Copyright 2012-2023 Free Software Foundation, Inc.
 #
 # This file is part of GNU lightning.
 #
 # License for more details.
 #
 
-AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
+AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+       -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 
 check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \
-       catomic
+       catomic protect riprel
 
 lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
 lightning_SOURCES = lightning.c
@@ -46,6 +47,12 @@ cva_list_SOURCES = cva_list.c
 catomic_LDADD = $(top_builddir)/lib/liblightning.la -lm -lpthread $(SHLIB)
 catomic_SOURCES = catomic.c
 
+protect_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+protect_SOURCES = protect.c
+
+riprel_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+riprel_SOURCES = riprel.c
+
 $(top_builddir)/lib/liblightning.la:
        cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
 
@@ -105,8 +112,10 @@ EXTRA_DIST =                               \
        range.tst       range.ok        \
        ranger.tst      ranger.ok       \
        ret.tst         ret.ok          \
+       skip.tst        skip.ok         \
        tramp.tst       tramp.ok        \
        va_list.tst     va_list.ok      \
+       bit.tst         bit.ok          \
        check.sh                        \
        check.x87.sh                    \
        check.arm.sh    check.swf.sh    \
@@ -114,7 +123,8 @@ EXTRA_DIST =                                \
        check.arm4.swf.sh               \
        check.nodata.sh                 \
        check.x87.nodata.sh             \
-       run-test        all.tst
+       run-test        all.tst         \
+       collatz.tst     factorial.tst
 
 base_TESTS =                           \
        3to2 add align allocai          \
@@ -135,8 +145,8 @@ base_TESTS =                                \
        clobber carry call              \
        float jmpr live put             \
        qalu_mul qalu_div               \
-       range ranger ret tramp          \
-       va_list
+       range ranger ret skip tramp     \
+       va_list bit
 
 $(base_TESTS): check.sh
        $(LN_S) $(srcdir)/check.sh $@
@@ -317,13 +327,14 @@ nodata_TESTS =                                            \
        clobber.nodata carry.nodata call.nodata         \
        float.nodata jmpr.nodata tramp.nodata           \
        range.nodata ranger.nodata put.nodata           \
-       va_list.nodata
+       va_list.nodata bit.nodata
 $(nodata_TESTS):       check.nodata.sh
        $(LN_S) $(srcdir)/check.nodata.sh $@
 TESTS += $(nodata_TESTS)
 endif
 
-TESTS += ccall self setcode nodata ctramp carg cva_list catomic
+TESTS += ccall self setcode nodata ctramp carg cva_list catomic \
+         protect riprel
 CLEANFILES = $(TESTS)
 
 #TESTS_ENVIRONMENT=$(srcdir)/run-test;
index ac4fc97..d24f7ae 100644 (file)
@@ -2,15 +2,16 @@
 .code
        prolog
        allocai 32 $buf
-       arg $c
-       arg $uc
-       arg $s
-       arg $us
-       arg $i
+       arg_c $c
+       arg_c $uc
+       arg_s $s
+       arg_s $us
+       arg_i $i
+       arg_i $ui
 #if __WORDSIZE == 64
-       arg $ui
-       arg $l
+       arg_l $l
 #endif
+       arg $a
        getarg_c %r0 $c
        getarg_uc %r0 $uc
        getarg_s %r0 $s
        getarg_ui %r0 $ui
        getarg_l %r0 $l
 #endif
+       getarg %r0 $a
+       putargr_c %r0 $c
+       putargi_c 1 $c
+       putargr_uc %r0 $uc
+       putargi_uc 1 $uc
+       putargr_s %r0 $s
+       putargi_s 1 $s
+       putargr_us %r0 $us
+       putargi_us 1 $us
+       putargr_i %r0 $i
+       putargi_i 1 $ui
+#if __WORDSIZE == 64
+       putargr_ui %r0 $ui
+       putargi_ui 1 $ui
+       putargr_l %r0 $l
+       putargi_l 1 $l
+#endif
+       putargr %r0 $a
+       putargi 1 $a
        addr %r0 %r1 %r2
        addi %r0 %r1 2
        addcr %r0 %r1 %r2
        rshi_u %r0 %r1 2
        negr %r0 %r1
        comr %r0 %r1
+       clor %r0 %r1
+       clzr %r0 %r1
+       ctor %r0 %r1
+       ctzr %r0 %r1
        ltr %r0 %r1 %r2
        lti %r0 %r1 2
        ltr_u %r0 %r1 %r2
@@ -205,6 +229,15 @@ label:
        callr %r0
        calli label
        prepare
+       pushargr_c %r0
+       pushargr_uc %r0
+       pushargr_s %r0
+       pushargr_us %r0
+       pushargr_i %r0
+#if __WORDSIZE == 64
+       pushargr_ui %r0
+       pushargr_l %r0
+#endif
        pushargr %r0
        finishr %r0
        prepare
@@ -212,6 +245,15 @@ label:
        ellipsis
        finishi 0x80000000
        ret
+       retr_c %r1
+       retr_uc %r1
+       retr_s %r1
+       retr_us %r1
+       retr_i %r1
+#if __WORDSIZE == 64
+       retr_ui %r1
+       retr_l %r1
+#endif
        retr %r1
        reti 2
        retval_c %r1
@@ -225,6 +267,8 @@ label:
 #endif
        arg_f $f
        getarg_f %f1 $f
+       putargr_f %f1 $f
+       putargi_f 1.0 $f
        addr_f %f0 %f1 %f2
        addi_f %f0 %f1 0.5
        subr_f %f0 %f1 %f2
@@ -323,6 +367,8 @@ unordi:
        retval_f %f1
        arg_d $f
        getarg_d %f1 $f
+       putargr_d %f1 $f
+       putargi_d 1.0 $f
        addr_d %f0 %f1 %f2
        addi_d %f0 %f1 0.5
        subr_d %f0 %f1 %f2
index e3ee010..1bffef8 100644 (file)
@@ -55,7 +55,7 @@ fill##T##done:                                                        \
 #define fill_us                fill_s
 #define fill_ui                fill_i
 
-#define ARG(  T, N)                    arg    $arg##T##N
+#define ARG(  T, N)                    arg##T $arg##T##N
 #define ARGF( T, N)                    arg##T $arg##T##N
 #define ARG1( K, T)                    ARG##K(T, 0)
 #define ARG2( K, T)    ARG1( K, T)     ARG##K(T, 1)
@@ -74,56 +74,56 @@ fill##T##done:                                                      \
 #define ARG15(K, T)    ARG14(K, T)     ARG##K(T, 14)
 #define ARG16(K, T)    ARG15(K, T)     ARG##K(T, 15)
 #define ARG_c(N)                       ARG##N( , _c)
-#define ARG_uc(N)                      ARG##N( , _uc)
+#define ARG_uc(N)                      ARG##N( , _c)
 #define ARG_s(N)                       ARG##N( , _s)
-#define ARG_us(N)                      ARG##N( , _us)
+#define ARG_us(N)                      ARG##N( , _s)
 #define ARG_i(N)                       ARG##N( , _i)
-#define ARG_ui(N)                      ARG##N( , _ui)
+#define ARG_ui(N)                      ARG##N( , _i)
 #define ARG_l(N)                       ARG##N( , _l)
 #define ARG_f(N)                       ARG##N(F, _f)
 #define ARG_d(N)                       ARG##N(F, _d)
 
-#define CHK(N, T, V)                                           \
-       getarg %r0 $arg##T##V                                   \
+#define CHK(N, T, TT, V)                                       \
+       getarg##T %r0 $arg##TT##V                               \
        ldxi##T %r1 %v0 $(V * szof##T)                          \
        beqr N##T##V %r0 %r1                                    \
        calli @abort                                            \
 N##T##V:
-#define CHKF(N, T, V)                                          \
-       getarg##T %f0 $arg##T##V                                \
+#define CHKF(N, T, TT, V)                                      \
+       getarg##T %f0 $arg##TT##V                               \
        ldxi##T %f1 %v0 $(V * szof##T)                          \
        beqr##T N##T##V %f0 %f1                                 \
        calli @abort                                            \
 N##T##V:
 
-#define GET1( K, N, T, V)                              CHK##K(N, T, 0)
-#define GET2( K, N, T, V)      GET1( K, N, T, V)       CHK##K(N, T, 1)
-#define GET3( K, N, T, V)      GET2( K, N, T, V)       CHK##K(N, T, 2)
-#define GET4( K, N, T, V)      GET3( K, N, T, V)       CHK##K(N, T, 3)
-#define GET5( K, N, T, V)      GET4( K, N, T, V)       CHK##K(N, T, 4)
-#define GET6( K, N, T, V)      GET5( K, N, T, V)       CHK##K(N, T, 5)
-#define GET7( K, N, T, V)      GET6( K, N, T, V)       CHK##K(N, T, 6)
-#define GET8( K, N, T, V)      GET7( K, N, T, V)       CHK##K(N, T, 7)
-#define GET9( K, N, T, V)      GET8( K, N, T, V)       CHK##K(N, T, 8)
-#define GET10(K, N, T, V)      GET9( K, N, T, V)       CHK##K(N, T, 9)
-#define GET11(K, N, T, V)      GET10(K, N, T, V)       CHK##K(N, T, 10)
-#define GET12(K, N, T, V)      GET11(K, N, T, V)       CHK##K(N, T, 11)
-#define GET13(K, N, T, V)      GET12(K, N, T, V)       CHK##K(N, T, 12)
-#define GET14(K, N, T, V)      GET13(K, N, T, V)       CHK##K(N, T, 13)
-#define GET15(K, N, T, V)      GET14(K, N, T, V)       CHK##K(N, T, 14)
-#define GET16(K, N, T, V)      GET15(K, N, T, V)       CHK##K(N, T, 15)
+#define GET1( K, N, T, TT, V)                          CHK##K(N, T, TT, 0)
+#define GET2( K, N, T, TT, V)  GET1( K, N, T, TT, V)   CHK##K(N, T, TT, 1)
+#define GET3( K, N, T, TT, V)  GET2( K, N, T, TT, V)   CHK##K(N, T, TT, 2)
+#define GET4( K, N, T, TT, V)  GET3( K, N, T, TT, V)   CHK##K(N, T, TT, 3)
+#define GET5( K, N, T, TT, V)  GET4( K, N, T, TT, V)   CHK##K(N, T, TT, 4)
+#define GET6( K, N, T, TT, V)  GET5( K, N, T, TT, V)   CHK##K(N, T, TT, 5)
+#define GET7( K, N, T, TT, V)  GET6( K, N, T, TT, V)   CHK##K(N, T, TT, 6)
+#define GET8( K, N, T, TT, V)  GET7( K, N, T, TT, V)   CHK##K(N, T, TT, 7)
+#define GET9( K, N, T, TT, V)  GET8( K, N, T, TT, V)   CHK##K(N, T, TT, 8)
+#define GET10(K, N, T, TT, V)  GET9( K, N, T, TT, V)   CHK##K(N, T, TT, 9)
+#define GET11(K, N, T, TT, V)  GET10(K, N, T, TT, V)   CHK##K(N, T, TT, 10)
+#define GET12(K, N, T, TT, V)  GET11(K, N, T, TT, V)   CHK##K(N, T, TT, 11)
+#define GET13(K, N, T, TT, V)  GET12(K, N, T, TT, V)   CHK##K(N, T, TT, 12)
+#define GET14(K, N, T, TT, V)  GET13(K, N, T, TT, V)   CHK##K(N, T, TT, 13)
+#define GET15(K, N, T, TT, V)  GET14(K, N, T, TT, V)   CHK##K(N, T, TT, 14)
+#define GET16(K, N, T, TT, V)  GET15(K, N, T, TT, V)   CHK##K(N, T, TT, 15)
 
-#define GET_c(N, M)            GET##N( , c##N,  _c,  M)
-#define GET_uc(N, M)           GET##N( , uc##N, _uc, M)
-#define GET_s(N, M)            GET##N( , s##N,  _s,  M)
-#define GET_us(N, M)           GET##N( , us##N, _us, M)
-#define GET_i(N, M)            GET##N( , i##N,  _i,  M)
-#define GET_ui(N, M)           GET##N( , ui##N, _ui, M)
-#define GET_l(N, M)            GET##N( , l##N,  _l,  M)
-#define GET_f(N, M)            GET##N(F, f##N,  _f,  M)
-#define GET_d(N, M)            GET##N(F, d##N,  _d,  M)
+#define GET_c(N, M)            GET##N( , c##N,  _c,  _c, M)
+#define GET_uc(N, M)           GET##N( , uc##N, _uc, _c, M)
+#define GET_s(N, M)            GET##N( , s##N,  _s,  _s, M)
+#define GET_us(N, M)           GET##N( , us##N, _us, _s, M)
+#define GET_i(N, M)            GET##N( , i##N,  _i,  _i, M)
+#define GET_ui(N, M)           GET##N( , ui##N, _ui, _i, M)
+#define GET_l(N, M)            GET##N( , l##N,  _l,  _l, M)
+#define GET_f(N, M)            GET##N(F, f##N,  _f,  _f, M)
+#define GET_d(N, M)            GET##N(F, d##N,  _d,  _d, M)
 
-#define PUSH(  T, V)           pushargi    V
+#define PUSH(  T, V)           pushargi##T V
 #define PUSHF( T, V)           pushargi##T V
 #define PUSH0( K, T)           /**/
 #define PUSH1( K, T)                                   PUSH##K(T, 0)
@@ -161,14 +161,14 @@ test##T##_0:                                                      \
        ret                                                     \
        epilog
 
-#define DEFN(N, M, T)                                          \
+#define DEFN(N, M, T, TT)                                      \
        name test##T##_##N                                      \
 test##T##_##N:                                                 \
        prolog                                                  \
        arg $argp                                               \
        /* stack buffer in %v0 */                               \
        getarg %v0 $argp                                        \
-       ARG##T(N)                                               \
+       ARG##TT(N)                                              \
        /* validate arguments */                                \
        GET##T(N, M)                                            \
        /* heap buffer in %v1 */                                \
@@ -260,24 +260,24 @@ test##T##_17_done:                                                \
        ret                                                     \
        epilog
 
-#define DEF(  T)                                               \
+#define DEF(  T, TT)                                           \
        DEF0( T)                                                \
-       DEFN( 1,  0, T)                                         \
-       DEFN( 2,  1, T)                                         \
-       DEFN( 3,  2, T)                                         \
-       DEFN( 4,  3, T)                                         \
-       DEFN( 5,  4, T)                                         \
-       DEFN( 6,  5, T)                                         \
-       DEFN( 7,  6, T)                                         \
-       DEFN( 8,  7, T)                                         \
-       DEFN( 9,  8, T)                                         \
-       DEFN(10,  9, T)                                         \
-       DEFN(11, 10, T)                                         \
-       DEFN(12, 11, T)                                         \
-       DEFN(13, 12, T)                                         \
-       DEFN(14, 13, T)                                         \
-       DEFN(15, 14, T)                                         \
-       DEFN(16, 15, T)                                         \
+       DEFN( 1,  0, T, TT)                                     \
+       DEFN( 2,  1, T, TT)                                     \
+       DEFN( 3,  2, T, TT)                                     \
+       DEFN( 4,  3, T, TT)                                     \
+       DEFN( 5,  4, T, TT)                                     \
+       DEFN( 6,  5, T, TT)                                     \
+       DEFN( 7,  6, T, TT)                                     \
+       DEFN( 8,  7, T, TT)                                     \
+       DEFN( 9,  8, T, TT)                                     \
+       DEFN(10,  9, T, TT)                                     \
+       DEFN(11, 10, T, TT)                                     \
+       DEFN(12, 11, T, TT)                                     \
+       DEFN(13, 12, T, TT)                                     \
+       DEFN(14, 13, T, TT)                                     \
+       DEFN(15, 14, T, TT)                                     \
+       DEFN(16, 15, T, TT)                                     \
        DEFX(T)
 
 #define CALL(T)                        calli test##T##_17
@@ -323,17 +323,17 @@ memcpy_done:
        FILLF(_f)
        FILLF(_d)
 
-       DEF(_c)
-       DEF(_uc)
-       DEF(_s)
-       DEF(_us)
-       DEF(_i)
+       DEF(_c, _c)
+       DEF(_uc, _c)
+       DEF(_s, _s)
+       DEF(_us, _s)
+       DEF(_i, _i)
 #if __WORDSIZE == 64
-       DEF(_ui)
-       DEF(_l)
+       DEF(_ui, _i)
+       DEF(_l, _l)
 #endif
-       DEF(_f)
-       DEF(_d)
+       DEF(_f, _f)
+       DEF(_d, _d)
 
        name main
 main:
diff --git a/deps/lightning/check/bit.ok b/deps/lightning/check/bit.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/bit.tst b/deps/lightning/check/bit.tst
new file mode 100644 (file)
index 0000000..b721d5c
--- /dev/null
@@ -0,0 +1,881 @@
+/* If the fallback clor, clzr, ctor and ctzr are used, it might be better
+ * to implement it as functions, as inlined it is almost as large as a
+ * function.
+ * Below is an example of how to do it.
+ */
+
+.data  4096
+str_clo:
+.c     "clo"
+str_clz:
+.c     "clz"
+str_cto:
+.c     "cto"
+str_ctz:
+.c     "ctz"
+print_fmt:
+#if __WORDSIZE == 64
+.c     "%s (0x%016lx) %s = %d\n"
+#else
+.c     "%s (0x%08lx) %s = %d\n"
+#endif
+ok:
+.c     "ok\n"
+
+#define BIT2(OP, ARG, RES, R0, R1)                     \
+       movi %R1 ARG                                    \
+       OP##r %R0 %R1                                   \
+       beqi OP##R0##R1##ARG %R0 RES                    \
+       calli @abort                                    \
+OP##R0##R1##ARG:
+
+#define BIT1(OP, ARG, RES, V0, V1, V2, R0, R1, R2)     \
+       BIT2(OP, ARG, RES, V0, V0)                      \
+       BIT2(OP, ARG, RES, V0, V1)                      \
+       BIT2(OP, ARG, RES, V0, V2)                      \
+       BIT2(OP, ARG, RES, V0, R0)                      \
+       BIT2(OP, ARG, RES, V0, R1)                      \
+       BIT2(OP, ARG, RES, V0, R2)
+
+#define  BIT(OP, ARG, RES, V0, V1, V2, R0, R1, R2)     \
+       BIT1(OP, ARG, RES, V1, V2, R0, R1, R2, V0)      \
+       BIT1(OP, ARG, RES, V2, R0, R1, R2, V0, V1)      \
+       BIT1(OP, ARG, RES, R0, R1, R2, V0, V1, V2)      \
+       BIT1(OP, ARG, RES, R1, R2, V0, V1, V2, R0)      \
+       BIT1(OP, ARG, RES, R2, V0, V1, V2, R0, R1)
+
+#define  CLO(ARG, RES)                                 \
+        BIT(clo, ARG, RES, v0, v1, v2, r0, r1, r2)
+#define  CLZ(ARG, RES)                                 \
+        BIT(clz, ARG, RES, v0, v1, v2, r0, r1, r2)
+#define  CTO(ARG, RES)                                 \
+        BIT(cto, ARG, RES, v0, v1, v2, r0, r1, r2)
+#define  CTZ(ARG, RES)                                 \
+        BIT(ctz, ARG, RES, v0, v1, v2, r0, r1, r2)
+
+.code
+       jmpi main
+/*
+       jit_uword_t cto(jit_uword_t r0) {
+               r0 = ~r0;
+               if (r0 == 0)
+                       r0 = __WORDSIZE;
+               else
+                       r0 = ctz(r0);
+               return r0;
+       } 
+ */
+name cto
+cto:
+       prolog
+       arg $in
+       getarg %r0 $in
+       comr %r0 %r0
+       bnei do_cto %r0 0
+       movi %r0 __WORDSIZE
+       jmpi done_cto
+do_cto:
+       prepare
+               pushargr %r0
+       finishi ctz
+       retval %r0
+done_cto:
+       retr %r0
+       epilog
+
+/*
+       jit_uword_t clo(jit_uword_t r0) {
+               r0 = ~r0;
+               if (r0 == 0)
+                       r0 = __WORDSIZE;
+               else
+                       r0 = clz(r0);
+               return r0;
+       } 
+ */
+name clo
+clo:
+       prolog
+       arg $in
+       getarg %r0 $in
+       comr %r0 %r0
+       bnei do_clo %r0 0
+       movi %r0 __WORDSIZE
+       jmpi done_clo
+do_clo:
+       prepare
+               pushargr %r0
+       finishi clz
+       retval %r0
+done_clo:
+       retr %r0
+       epilog
+
+/*
+       jit_uword_t clz(jit_word_t r1) {
+               jit_uword_t     r0, r2;
+               if (r1 == 0)
+                       r0 = __WORDSIZE;
+               else {
+                       r0 = 0;
+       #if __WORDSIZE == 64
+                       r2 = 0xffffffff00000000UL;
+                       if (!(r1 & r2)) {
+                               r1 <<= 32;
+                               r0 += 32;
+                       }
+                       r2 <<= 16;
+       #else
+                       r2 = 0xffff0000UL;
+       #endif
+                       if (!(r1 & r2)) {
+                               r1 <<= 16;
+                               r0 += 16;
+                       }
+                       r2 <<= 8;
+                       if (!(r1 & r2)) {
+                               r1 <<= 8;
+                               r0 += 8;
+                       }
+                       r2 <<= 4;
+                       if (!(r1 & r2)) {
+                               r1 <<= 4;
+                               r0 += 4;
+                       }
+                       r2 <<= 2;
+                       if (!(r1 & r2)) {
+                               r1 <<= 2;
+                               r0 += 2;
+                       }
+                       r2 <<= 1;
+                       if (!(r1 & r2))
+                               r0 += 1;
+               }
+               return r0;
+       } 
+ */
+name clz
+clz:
+       prolog
+       arg $in
+       getarg %r1 $in
+       bnei lun %r1 0
+       reti __WORDSIZE
+lun:
+       movi %r0 0
+#if __WORDSIZE == 64
+       movi %r2 0xffffffff00000000
+       bmsr l32 %r1 %r2
+       lshi %r1 %r1 32
+       addi %r0 %r0 32
+l32:
+       lshi %r2 %r2 16
+#else
+       movi %r2 0xffff0000
+#endif
+       bmsr l16 %r1 %r2
+       lshi %r1 %r1 16
+       addi %r0 %r0 16
+l16:
+       lshi %r2 %r2 8
+       bmsr  l8 %r1 %r2
+       lshi %r1 %r1 8
+       addi %r0 %r0 8
+l8:
+       lshi %r2 %r2 4
+       bmsr  l4 %r1 %r2
+       lshi %r1 %r1 4
+       addi %r0 %r0 4
+l4:
+       lshi %r2 %r2 2
+       bmsr  l2 %r1 %r2
+       lshi %r1 %r1 2
+       addi %r0 %r0 2
+l2:
+       lshi %r2 %r2 1
+       bmsr  l1 %r1 %r2
+       addi %r0 %r0 1
+l1:
+       retr %r0
+       epilog
+
+/*
+       jit_uword_t ctz(jit_uword_t r1) {
+               jit_uword_t     r0, r2;
+               if (r1 == 0)
+                       r0 = __WORDSIZE;
+               else {
+                       r0 = 0;
+       #if __WORDSIZE == 64
+                       r2 = 0xffffffffUL;;
+                       if (!(r1 & r2)) {
+                               r1 >>= 32;
+                               r0 += 32;
+                       }
+                       r2 >>= 16;
+       #else
+                       r2 = 0xffffUL;;
+       #endif
+                       if (!(r1 & r2)) {
+                               r1 >>= 16;
+                               r0 += 16;
+                       }
+                       r2 >>= 8;
+                       if (!(r1 & r2)) {
+                               r1 >>= 8;
+                               r0 += 8;
+                       }
+                       r2 >>= 4;
+                       if (!(r1 & r2)) {
+                               r1 >>= 4;
+                               r0 += 4;
+                       }
+                       r2 >>= 2;
+                       if (!(r1 & r2)) {
+                               r1 >>= 2;
+                               r0 += 2;
+                       }
+                       r2 >>= 1;
+                       if (!(r1 & r2))
+                               r0 += 1;
+               }
+               return r0;
+       }
+*/
+name   ctz
+ctz:
+       prolog
+       arg $in
+       getarg %r1 $in
+       bnei tun %r1 0
+       reti __WORDSIZE
+tun:
+#if __WORDSIZE == 64
+       movi %r0 0
+       movi %r2 0xffffffff
+       bmsr t32 %r1 %r2
+       rshi_u %r1 %r1 32
+       addi %r0 %r0 32
+t32:
+       rshi %r2 %r2 16
+#else
+       movi %r2 0xffff
+#endif
+       bmsr t16 %r1 %r2
+       rshi_u %r1 %r1 16
+       addi %r0 %r0 16
+t16:
+       rshi %r2 %r2 8
+       bmsr  t8 %r1 %r2
+       rshi_u %r1 %r1 8
+       addi %r0 %r0 8
+t8:
+       rshi %r2 %r2 4
+       bmsr  t4 %r1 %r2
+       rshi_u %r1 %r1 4
+       addi %r0 %r0 4
+t4:
+       rshi %r2 %r2 2
+       bmsr  t2 %r1 %r2
+       rshi_u %r1 %r1 2
+       addi %r0 %r0 2
+t2:
+       rshi %r2 %r2 1
+       bmsr  t1 %r1 %r2
+       addi %r0 %r0 1
+t1:
+       retr %r0
+       epilog
+
+/*
+       char *bitsprint(char *v0, jit_uword_t v1) {
+               jit_uword_t r0, r1;
+               memset(v0, '0', __WORDSIZE);
+               v0[__WORDSIZE] = 0;
+               for (r0 = 1L << (__WORDSIZE - 1), r1 = 0; r0; r0 >>= 1, ++r1) {
+                       if (v1 & r0)
+                               v0[r1] = '1';
+               }
+               return v0;
+       }
+ */
+name bitsprint
+bitsprint:
+       prolog
+       arg $buf
+       arg $val
+       getarg %v0 $buf
+       getarg %v1 $val
+       prepare
+               pushargr %v0
+               pushargi '0'
+               pushargi __WORDSIZE
+       finishi @memset
+       movi %r0 0
+       addi %r1 %v0 __WORDSIZE
+       str_c %r1 %r0
+       movi %r0 $(1 << (__WORDSIZE - 1))
+       movi %r1 0
+       movi %r2 '1'
+bitloop:
+       bmcr bitzero %v1 %r0
+       stxr_c %r1 %v0 %r2
+bitzero:
+       addi %r1 %r1 1
+       rshi_u %r0 %r0 1
+       bnei bitloop %r0 0
+       retr %v0
+       epilog
+
+/*
+       #if 0
+       int main(int argc, char *argv[]) {
+               jit_uword_t      r0, v0, v1, v2;
+               char             buf[80];
+       #if __WORDSIZE == 64
+               char            *fmt = "%s (0x%016lx) %s = %d\n";
+               v0 = 0x8000000000000000UL;
+               v2 = 0xffffffffffffffffUL;
+       #else
+               char            *fmt = "%s (0x%08lx) %s = %d\n";
+               v0 = 0x80000000UL;
+               v2 = 0xffffffffUL;
+       #endif
+               do {
+                       v1 = v0 - 1;
+                       r0 = clz(v0);
+                       bitsprint(buf, v0);
+                       printf(fmt, "clz", v0, buf, r0);
+                       r0 = clo(v2);
+                       bitsprint(buf, v2);
+                       printf(fmt, "clo", v2, buf, r0);
+                       r0 = ctz(v0);
+                       bitsprint(buf, v0);
+                       printf(fmt, "ctz", v0, buf, r0);
+                       r0 = cto(v1);
+                       bitsprint(buf, v1);
+                       printf(fmt, "cto", v1, buf, r0);
+                       v0 >>= 1;
+                       v2 <<= 1;
+               } while ((jit_word_t)v1 > -1);
+               return 0;
+       }
+       #endif
+ */
+
+/* Make it "#if 1" for a "debug mode", that helps in regenerating tables,
+ * or temporary state while implementing optimized port specific versions. */
+#if 0
+#define CALL_FUNC      1
+       name main
+main:
+       prolog
+       allocai 80 $buf
+#if __WORDSIZE == 64
+       movi %v0 0x8000000000000000
+       movi %v2 0xffffffffffffffff
+#else
+       movi %v0 0x80000000
+       movi %v2 0xffffffff
+#endif
+loop:
+       subi %v1 %v0 1
+       addi %r1 %fp $buf
+       prepare
+               pushargr %r1
+               pushargr %v0
+       finishi bitsprint
+#if CALL_FUNC
+       prepare
+               pushargr %v0
+       finishi clz
+       retval %r0
+#else
+       clzr %r0 %v0
+#endif
+       addi %r1 %fp $buf
+       prepare
+               pushargi print_fmt
+               ellipsis
+               pushargi str_clz
+               pushargr %v0
+               pushargr %r1
+               pushargr %r0
+       finishi @printf
+       addi %r1 %fp $buf
+       prepare
+               pushargr %r1
+               pushargr %v2
+       finishi bitsprint
+#if CALL_FUNC
+       prepare
+               pushargr %v2
+       finishi clo
+       retval %r0
+#else
+       clor %r0 %v2
+#endif
+       addi %r1 %fp $buf
+       prepare
+               pushargi print_fmt
+               ellipsis
+               pushargi str_clo
+               pushargr %v2
+               pushargr %r1
+               pushargr %r0
+       finishi @printf
+       addi %r1 %fp $buf
+       prepare
+               pushargr %r1
+               pushargr %v0
+       finishi bitsprint
+#if CALL_FUNC
+       prepare
+               pushargr %v0
+       finishi ctz
+       retval %r0
+#else
+       ctzr %r0 %v0
+#endif
+       addi %r1 %fp $buf
+       prepare
+               pushargi print_fmt
+               ellipsis
+               pushargi str_ctz
+               pushargr %v0
+               pushargr %r1
+               pushargr %r0
+       finishi @printf
+       addi %r1 %fp $buf
+       prepare
+               pushargr %r1
+               pushargr %v1
+       finishi bitsprint
+#if CALL_FUNC
+       prepare
+               pushargr %v1
+       finishi cto
+       retval %r0
+#else
+       ctor %r0 %v1
+#endif
+       addi %r1 %fp $buf
+       prepare
+               pushargi print_fmt
+               ellipsis
+               pushargi str_cto
+               pushargr %v1
+               pushargr %r1
+               pushargr %r0
+       finishi @printf
+       rshi_u %v0 %v0 1
+       lshi %v2 %v2 1
+       bgti loop %v1 -1
+       ret
+       epilog
+#else
+
+       name main
+main:
+       prolog
+#if __WORDSIZE == 32
+       CLZ(0x80000000, 0)
+       CLO(0xffffffff, 32)
+       CTZ(0x80000000, 31)
+       CTO(0x7fffffff, 31)
+       CLZ(0x40000000, 1)
+       CLO(0xfffffffe, 31)
+       CTZ(0x40000000, 30)
+       CTO(0x3fffffff, 30)
+       CLZ(0x20000000, 2)
+       CLO(0xfffffffc, 30)
+       CTZ(0x20000000, 29)
+       CTO(0x1fffffff, 29)
+       CLZ(0x10000000, 3)
+       CLO(0xfffffff8, 29)
+       CTZ(0x10000000, 28)
+       CTO(0x0fffffff, 28)
+       CLZ(0x08000000, 4)
+       CLO(0xfffffff0, 28)
+       CTZ(0x08000000, 27)
+       CTO(0x07ffffff, 27)
+       CLZ(0x04000000, 5)
+       CLO(0xffffffe0, 27)
+       CTZ(0x04000000, 26)
+       CTO(0x03ffffff, 26)
+       CLZ(0x02000000, 6)
+       CLO(0xffffffc0, 26)
+       CTZ(0x02000000, 25)
+       CTO(0x01ffffff, 25)
+       CLZ(0x01000000, 7)
+       CLO(0xffffff80, 25)
+       CTZ(0x01000000, 24)
+       CTO(0x00ffffff, 24)
+       CLZ(0x00800000, 8)
+       CLO(0xffffff00, 24)
+       CTZ(0x00800000, 23)
+       CTO(0x007fffff, 23)
+       CLZ(0x00400000, 9)
+       CLO(0xfffffe00, 23)
+       CTZ(0x00400000, 22)
+       CTO(0x003fffff, 22)
+       CLZ(0x00200000, 10)
+       CLO(0xfffffc00, 22)
+       CTZ(0x00200000, 21)
+       CTO(0x001fffff, 21)
+       CLZ(0x00100000, 11)
+       CLO(0xfffff800, 21)
+       CTZ(0x00100000, 20)
+       CTO(0x000fffff, 20)
+       CLZ(0x00080000, 12)
+       CLO(0xfffff000, 20)
+       CTZ(0x00080000, 19)
+       CTO(0x0007ffff, 19)
+       CLZ(0x00040000, 13)
+       CLO(0xffffe000, 19)
+       CTZ(0x00040000, 18)
+       CTO(0x0003ffff, 18)
+       CLZ(0x00020000, 14)
+       CLO(0xffffc000, 18)
+       CTZ(0x00020000, 17)
+       CTO(0x0001ffff, 17)
+       CLZ(0x00010000, 15)
+       CLO(0xffff8000, 17)
+       CTZ(0x00010000, 16)
+       CTO(0x0000ffff, 16)
+       CLZ(0x00008000, 16)
+       CLO(0xffff0000, 16)
+       CTZ(0x00008000, 15)
+       CTO(0x00007fff, 15)
+       CLZ(0x00004000, 17)
+       CLO(0xfffe0000, 15)
+       CTZ(0x00004000, 14)
+       CTO(0x00003fff, 14)
+       CLZ(0x00002000, 18)
+       CLO(0xfffc0000, 14)
+       CTZ(0x00002000, 13)
+       CTO(0x00001fff, 13)
+       CLZ(0x00001000, 19)
+       CLO(0xfff80000, 13)
+       CTZ(0x00001000, 12)
+       CTO(0x00000fff, 12)
+       CLZ(0x00000800, 20)
+       CLO(0xfff00000, 12)
+       CTZ(0x00000800, 11)
+       CTO(0x000007ff, 11)
+       CLZ(0x00000400, 21)
+       CLO(0xffe00000, 11)
+       CTZ(0x00000400, 10)
+       CTO(0x000003ff, 10)
+       CLZ(0x00000200, 22)
+       CLO(0xffc00000, 10)
+       CTZ(0x00000200, 9)
+       CTO(0x000001ff, 9)
+       CLZ(0x00000100, 23)
+       CLO(0xff800000, 9)
+       CTZ(0x00000100, 8)
+       CTO(0x000000ff, 8)
+       CLZ(0x00000080, 24)
+       CLO(0xff000000, 8)
+       CTZ(0x00000080, 7)
+       CTO(0x0000007f, 7)
+       CLZ(0x00000040, 25)
+       CLO(0xfe000000, 7)
+       CTZ(0x00000040, 6)
+       CTO(0x0000003f, 6)
+       CLZ(0x00000020, 26)
+       CLO(0xfc000000, 6)
+       CTZ(0x00000020, 5)
+       CTO(0x0000001f, 5)
+       CLZ(0x00000010, 27)
+       CLO(0xf8000000, 5)
+       CTZ(0x00000010, 4)
+       CTO(0x0000000f, 4)
+       CLZ(0x00000008, 28)
+       CLO(0xf0000000, 4)
+       CTZ(0x00000008, 3)
+       CTO(0x00000007, 3)
+       CLZ(0x00000004, 29)
+       CLO(0xe0000000, 3)
+       CTZ(0x00000004, 2)
+       CTO(0x00000003, 2)
+       CLZ(0x00000002, 30)
+       CLO(0xc0000000, 2)
+       CTZ(0x00000002, 1)
+       CTO(0x00000001, 1)
+       CLZ(0x00000001, 31)
+       CLO(0x80000000, 1)
+       CTZ(0x00000001, 0)
+       CTO(0x00000000, 0)
+       CLZ(0x00000000, 32)
+       CLO(0x00000000, 0)
+       CTZ(0x00000000, 32)
+       CTO(0xffffffff, 32)
+#else
+       CLZ(0x8000000000000000, 0)
+       CLO(0xffffffffffffffff, 64)
+       CTZ(0x8000000000000000, 63)
+       CTO(0x7fffffffffffffff, 63)
+       CLZ(0x4000000000000000, 1)
+       CLO(0xfffffffffffffffe, 63)
+       CTZ(0x4000000000000000, 62)
+       CTO(0x3fffffffffffffff, 62)
+       CLZ(0x2000000000000000, 2)
+       CLO(0xfffffffffffffffc, 62)
+       CTZ(0x2000000000000000, 61)
+       CTO(0x1fffffffffffffff, 61)
+       CLZ(0x1000000000000000, 3)
+       CLO(0xfffffffffffffff8, 61)
+       CTZ(0x1000000000000000, 60)
+       CTO(0x0fffffffffffffff, 60)
+       CLZ(0x0800000000000000, 4)
+       CLO(0xfffffffffffffff0, 60)
+       CTZ(0x0800000000000000, 59)
+       CTO(0x07ffffffffffffff, 59)
+       CLZ(0x0400000000000000, 5)
+       CLO(0xffffffffffffffe0, 59)
+       CTZ(0x0400000000000000, 58)
+       CTO(0x03ffffffffffffff, 58)
+       CLZ(0x0200000000000000, 6)
+       CLO(0xffffffffffffffc0, 58)
+       CTZ(0x0200000000000000, 57)
+       CTO(0x01ffffffffffffff, 57)
+       CLZ(0x0100000000000000, 7)
+       CLO(0xffffffffffffff80, 57)
+       CTZ(0x0100000000000000, 56)
+       CTO(0x00ffffffffffffff, 56)
+       CLZ(0x0080000000000000, 8)
+       CLO(0xffffffffffffff00, 56)
+       CTZ(0x0080000000000000, 55)
+       CTO(0x007fffffffffffff, 55)
+       CLZ(0x0040000000000000, 9)
+       CLO(0xfffffffffffffe00, 55)
+       CTZ(0x0040000000000000, 54)
+       CTO(0x003fffffffffffff, 54)
+       CLZ(0x0020000000000000, 10)
+       CLO(0xfffffffffffffc00, 54)
+       CTZ(0x0020000000000000, 53)
+       CTO(0x001fffffffffffff, 53)
+       CLZ(0x0010000000000000, 11)
+       CLO(0xfffffffffffff800, 53)
+       CTZ(0x0010000000000000, 52)
+       CTO(0x000fffffffffffff, 52)
+       CLZ(0x0008000000000000, 12)
+       CLO(0xfffffffffffff000, 52)
+       CTZ(0x0008000000000000, 51)
+       CTO(0x0007ffffffffffff, 51)
+       CLZ(0x0004000000000000, 13)
+       CLO(0xffffffffffffe000, 51)
+       CTZ(0x0004000000000000, 50)
+       CTO(0x0003ffffffffffff, 50)
+       CLZ(0x0002000000000000, 14)
+       CLO(0xffffffffffffc000, 50)
+       CTZ(0x0002000000000000, 49)
+       CTO(0x0001ffffffffffff, 49)
+       CLZ(0x0001000000000000, 15)
+       CLO(0xffffffffffff8000, 49)
+       CTZ(0x0001000000000000, 48)
+       CTO(0x0000ffffffffffff, 48)
+       CLZ(0x0000800000000000, 16)
+       CLO(0xffffffffffff0000, 48)
+       CTZ(0x0000800000000000, 47)
+       CTO(0x00007fffffffffff, 47)
+       CLZ(0x0000400000000000, 17)
+       CLO(0xfffffffffffe0000, 47)
+       CTZ(0x0000400000000000, 46)
+       CTO(0x00003fffffffffff, 46)
+       CLZ(0x0000200000000000, 18)
+       CLO(0xfffffffffffc0000, 46)
+       CTZ(0x0000200000000000, 45)
+       CTO(0x00001fffffffffff, 45)
+       CLZ(0x0000100000000000, 19)
+       CLO(0xfffffffffff80000, 45)
+       CTZ(0x0000100000000000, 44)
+       CTO(0x00000fffffffffff, 44)
+       CLZ(0x0000080000000000, 20)
+       CLO(0xfffffffffff00000, 44)
+       CTZ(0x0000080000000000, 43)
+       CTO(0x000007ffffffffff, 43)
+       CLZ(0x0000040000000000, 21)
+       CLO(0xffffffffffe00000, 43)
+       CTZ(0x0000040000000000, 42)
+       CTO(0x000003ffffffffff, 42)
+       CLZ(0x0000020000000000, 22)
+       CLO(0xffffffffffc00000, 42)
+       CTZ(0x0000020000000000, 41)
+       CTO(0x000001ffffffffff, 41)
+       CLZ(0x0000010000000000, 23)
+       CLO(0xffffffffff800000, 41)
+       CTZ(0x0000010000000000, 40)
+       CTO(0x000000ffffffffff, 40)
+       CLZ(0x0000008000000000, 24)
+       CLO(0xffffffffff000000, 40)
+       CTZ(0x0000008000000000, 39)
+       CTO(0x0000007fffffffff, 39)
+       CLZ(0x0000004000000000, 25)
+       CLO(0xfffffffffe000000, 39)
+       CTZ(0x0000004000000000, 38)
+       CTO(0x0000003fffffffff, 38)
+       CLZ(0x0000002000000000, 26)
+       CLO(0xfffffffffc000000, 38)
+       CTZ(0x0000002000000000, 37)
+       CTO(0x0000001fffffffff, 37)
+       CLZ(0x0000001000000000, 27)
+       CLO(0xfffffffff8000000, 37)
+       CTZ(0x0000001000000000, 36)
+       CTO(0x0000000fffffffff, 36)
+       CLZ(0x0000000800000000, 28)
+       CLO(0xfffffffff0000000, 36)
+       CTZ(0x0000000800000000, 35)
+       CTO(0x00000007ffffffff, 35)
+       CLZ(0x0000000400000000, 29)
+       CLO(0xffffffffe0000000, 35)
+       CTZ(0x0000000400000000, 34)
+       CTO(0x00000003ffffffff, 34)
+       CLZ(0x0000000200000000, 30)
+       CLO(0xffffffffc0000000, 34)
+       CTZ(0x0000000200000000, 33)
+       CTO(0x00000001ffffffff, 33)
+       CLZ(0x0000000100000000, 31)
+       CLO(0xffffffff80000000, 33)
+       CTZ(0x0000000100000000, 32)
+       CTO(0x00000000ffffffff, 32)
+       CLZ(0x0000000080000000, 32)
+       CLO(0xffffffff00000000, 32)
+       CTZ(0x0000000080000000, 31)
+       CTO(0x000000007fffffff, 31)
+       CLZ(0x0000000040000000, 33)
+       CLO(0xfffffffe00000000, 31)
+       CTZ(0x0000000040000000, 30)
+       CTO(0x000000003fffffff, 30)
+       CLZ(0x0000000020000000, 34)
+       CLO(0xfffffffc00000000, 30)
+       CTZ(0x0000000020000000, 29)
+       CTO(0x000000001fffffff, 29)
+       CLZ(0x0000000010000000, 35)
+       CLO(0xfffffff800000000, 29)
+       CTZ(0x0000000010000000, 28)
+       CTO(0x000000000fffffff, 28)
+       CLZ(0x0000000008000000, 36)
+       CLO(0xfffffff000000000, 28)
+       CTZ(0x0000000008000000, 27)
+       CTO(0x0000000007ffffff, 27)
+       CLZ(0x0000000004000000, 37)
+       CLO(0xffffffe000000000, 27)
+       CTZ(0x0000000004000000, 26)
+       CTO(0x0000000003ffffff, 26)
+       CLZ(0x0000000002000000, 38)
+       CLO(0xffffffc000000000, 26)
+       CTZ(0x0000000002000000, 25)
+       CTO(0x0000000001ffffff, 25)
+       CLZ(0x0000000001000000, 39)
+       CLO(0xffffff8000000000, 25)
+       CTZ(0x0000000001000000, 24)
+       CTO(0x0000000000ffffff, 24)
+       CLZ(0x0000000000800000, 40)
+       CLO(0xffffff0000000000, 24)
+       CTZ(0x0000000000800000, 23)
+       CTO(0x00000000007fffff, 23)
+       CLZ(0x0000000000400000, 41)
+       CLO(0xfffffe0000000000, 23)
+       CTZ(0x0000000000400000, 22)
+       CTO(0x00000000003fffff, 22)
+       CLZ(0x0000000000200000, 42)
+       CLO(0xfffffc0000000000, 22)
+       CTZ(0x0000000000200000, 21)
+       CTO(0x00000000001fffff, 21)
+       CLZ(0x0000000000100000, 43)
+       CLO(0xfffff80000000000, 21)
+       CTZ(0x0000000000100000, 20)
+       CTO(0x00000000000fffff, 20)
+       CLZ(0x0000000000080000, 44)
+       CLO(0xfffff00000000000, 20)
+       CTZ(0x0000000000080000, 19)
+       CTO(0x000000000007ffff, 19)
+       CLZ(0x0000000000040000, 45)
+       CLO(0xffffe00000000000, 19)
+       CTZ(0x0000000000040000, 18)
+       CTO(0x000000000003ffff, 18)
+       CLZ(0x0000000000020000, 46)
+       CLO(0xffffc00000000000, 18)
+       CTZ(0x0000000000020000, 17)
+       CTO(0x000000000001ffff, 17)
+       CLZ(0x0000000000010000, 47)
+       CLO(0xffff800000000000, 17)
+       CTZ(0x0000000000010000, 16)
+       CTO(0x000000000000ffff, 16)
+       CLZ(0x0000000000008000, 48)
+       CLO(0xffff000000000000, 16)
+       CTZ(0x0000000000008000, 15)
+       CTO(0x0000000000007fff, 15)
+       CLZ(0x0000000000004000, 49)
+       CLO(0xfffe000000000000, 15)
+       CTZ(0x0000000000004000, 14)
+       CTO(0x0000000000003fff, 14)
+       CLZ(0x0000000000002000, 50)
+       CLO(0xfffc000000000000, 14)
+       CTZ(0x0000000000002000, 13)
+       CTO(0x0000000000001fff, 13)
+       CLZ(0x0000000000001000, 51)
+       CLO(0xfff8000000000000, 13)
+       CTZ(0x0000000000001000, 12)
+       CTO(0x0000000000000fff, 12)
+       CLZ(0x0000000000000800, 52)
+       CLO(0xfff0000000000000, 12)
+       CTZ(0x0000000000000800, 11)
+       CTO(0x00000000000007ff, 11)
+       CLZ(0x0000000000000400, 53)
+       CLO(0xffe0000000000000, 11)
+       CTZ(0x0000000000000400, 10)
+       CTO(0x00000000000003ff, 10)
+       CLZ(0x0000000000000200, 54)
+       CLO(0xffc0000000000000, 10)
+       CTZ(0x0000000000000200, 9)
+       CTO(0x00000000000001ff, 9)
+       CLZ(0x0000000000000100, 55)
+       CLO(0xff80000000000000, 9)
+       CTZ(0x0000000000000100, 8)
+       CTO(0x00000000000000ff, 8)
+       CLZ(0x0000000000000080, 56)
+       CLO(0xff00000000000000, 8)
+       CTZ(0x0000000000000080, 7)
+       CTO(0x000000000000007f, 7)
+       CLZ(0x0000000000000040, 57)
+       CLO(0xfe00000000000000, 7)
+       CTZ(0x0000000000000040, 6)
+       CTO(0x000000000000003f, 6)
+       CLZ(0x0000000000000020, 58)
+       CLO(0xfc00000000000000, 6)
+       CTZ(0x0000000000000020, 5)
+       CTO(0x000000000000001f, 5)
+       CLZ(0x0000000000000010, 59)
+       CLO(0xf800000000000000, 5)
+       CTZ(0x0000000000000010, 4)
+       CTO(0x000000000000000f, 4)
+       CLZ(0x0000000000000008, 60)
+       CLO(0xf000000000000000, 4)
+       CTZ(0x0000000000000008, 3)
+       CTO(0x0000000000000007, 3)
+       CLZ(0x0000000000000004, 61)
+       CLO(0xe000000000000000, 3)
+       CTZ(0x0000000000000004, 2)
+       CTO(0x0000000000000003, 2)
+       CLZ(0x0000000000000002, 62)
+       CLO(0xc000000000000000, 2)
+       CTZ(0x0000000000000002, 1)
+       CTO(0x0000000000000001, 1)
+       CLZ(0x0000000000000001, 63)
+       CLO(0x8000000000000000, 1)
+       CTZ(0x0000000000000001, 0)
+       CTO(0x0000000000000000, 0)
+       CLZ(0x0000000000000000, 64)
+       CLO(0x0000000000000000, 0)
+       CTZ(0x0000000000000000, 64)
+       CTO(0xffffffffffffffff, 64)
+#endif
+       prepare
+               pushargi ok
+       finishi @printf 
+       reti 0
+       epilog
+#endif
index 21068b6..40fb041 100644 (file)
@@ -1,10 +1,10 @@
-#define def_wi(i)                      \
+#define def_wi(i, ii)                  \
        name _w##i                      \
 _w##i:                                 \
        prolog                          \
-       arg $arg##i                     \
+       arg##ii $arg##i                 \
        getarg##i %r0 $arg##i           \
-       retr %r0                        \
+       retr##i %r0                     \
        epilog
 #define def_wf(f)                      \
        name _w##f                      \
@@ -15,11 +15,11 @@ _w##f:                                      \
        truncr##f %r0 %f0               \
        retr %r0                        \
        epilog
-#define def_fi(f, i)                   \
+#define def_fi(f, i, ii)               \
        name f##i                       \
 f##i:                                  \
        prolog                          \
-       arg $arg##i                     \
+       arg##ii $arg##i                 \
        getarg##i %r0 $arg##i           \
        extr##f %f0 %r0                 \
        retr##f %f0                     \
@@ -52,33 +52,33 @@ bstr:
 .code
        jmpi main
 
-       def_wi(_c)
-       def_wi(_uc)
-       def_wi(_s)
-       def_wi(_us)
+       def_wi(_c, _c)
+       def_wi(_uc, _c)
+       def_wi(_s, _s)
+       def_wi(_us, _s)
 #if __WORDSIZE == 64
-       def_wi(_i)
-       def_wi(_ui)
+       def_wi(_i, _i)
+       def_wi(_ui, _i)
 #endif
        def_wf(_f)
        def_wf(_d)
-       def_fi(_f, _c)
-       def_fi(_f, _uc)
-       def_fi(_f, _s)
-       def_fi(_f, _us)
-       def_fi(_f, _i)
+       def_fi(_f, _c, _c)
+       def_fi(_f, _uc, _c)
+       def_fi(_f, _s, _s)
+       def_fi(_f, _us, _s)
+       def_fi(_f, _i, _i)
 #if __WORDSIZE == 64
-       def_fi(_f, _ui)
-       def_fi(_f, _l)
+       def_fi(_f, _ui, _i)
+       def_fi(_f, _l, _l)
 #endif
-       def_fi(_d, _c)
-       def_fi(_d, _uc)
-       def_fi(_d, _s)
-       def_fi(_d, _us)
-       def_fi(_d, _i)
+       def_fi(_d, _c, _c)
+       def_fi(_d, _uc, _c)
+       def_fi(_d, _s, _s)
+       def_fi(_d, _us, _s)
+       def_fi(_d, _i, _i)
 #if __WORDSIZE == 64
-       def_fi(_d, _ui)
-       def_fi(_d, _l)
+       def_fi(_d, _ui, _i)
+       def_fi(_d, _l, _l)
 #endif
        def_f(_f)
        def_f(_d)
@@ -91,7 +91,7 @@ main:
 
 #define _call_w(n, i, a, r)            \
        prepare                         \
-               pushargi a              \
+               pushargi##i a           \
        finishi _w##i                   \
        retval %r0                      \
        extr##i %r0 %r0                 \
@@ -111,7 +111,7 @@ _w##f##_##n:
 #define call_wf(n, f, a, r)            _call_wf(n, f, a, r)
 #define _call_fi(n, f, i, a, r)                \
        prepare                         \
-               pushargi a              \
+               pushargi##i a           \
        finishi f##i                    \
        retval##f %f0                   \
        beqi##f f##i##n %f0 r           \
@@ -196,6 +196,7 @@ f##g##n:
        call_wf(__LINE__, _d, c7f, f7f)
        call_wf(__LINE__, _d, wc80, f80)
        call_wf(__LINE__, _d, wc81, f81)
+
        call_fi(__LINE__, _f, _c, c7f, f7f)
        call_fi(__LINE__, _f, _c, c80, f80)
        call_fi(__LINE__, _f, _uc, c7f, f7f)
index 35b897e..6992db4 100644 (file)
@@ -58,8 +58,8 @@ int
 main(int argc, char *argv[])
 {
     void               (*code)(void);
-    jit_node_t         *jmp, *pass;
-    jit_node_t          *jw,  *jf,  *jd;
+    jit_node_t         *jmp, *pass, *fail;
+    jit_node_t          *jw, *jf, *jd;
     jit_int32_t                  s1,   s2,   s3,   s4,   s5,   s6,   s7,   s8,
                          s9,  s10,  s11,  s12,  s13,  s14,  s15,  s16;
     jit_node_t          *a1,  *a2,  *a3,  *a4,  *a5,  *a6,  *a7,  *a8,
@@ -172,10 +172,11 @@ main(int argc, char *argv[])
     LOAD_ARG(16);
 #undef LOAD_ARG
     pass = jit_forward();
+    fail = jit_forward();
 #define CHECK_ARG(N)                                                   \
     do {                                                               \
        jit_getarg(JIT_R0, a##N);                                       \
-       jit_patch_at(jit_beqi(JIT_R0, 17 - N), pass);                   \
+       jit_patch_at(jit_bnei(JIT_R0, 17 - N), fail);                   \
     } while (0)
     CHECK_ARG(1);
     CHECK_ARG(2);
@@ -194,6 +195,8 @@ main(int argc, char *argv[])
     CHECK_ARG(15);
     CHECK_ARG(16);
 #undef CHECK_ARG
+    jit_patch_at(jit_jmpi(), pass);
+    jit_link(fail);
     jit_calli(abort);
     jit_link(pass);
     jit_ret();
@@ -300,10 +303,11 @@ main(int argc, char *argv[])
     LOAD_ARG(16);
 #undef LOAD_ARG
     pass = jit_forward();
+    fail = jit_forward();
 #define CHECK_ARG(N)                                                   \
     do {                                                               \
        jit_getarg_f(JIT_F0, a##N);                                     \
-       jit_patch_at(jit_beqi_f(JIT_F0, 17 - N), pass);                 \
+       jit_patch_at(jit_bnei_f(JIT_F0, 17 - N), fail);                 \
     } while (0)
     CHECK_ARG(1);
     CHECK_ARG(2);
@@ -322,6 +326,8 @@ main(int argc, char *argv[])
     CHECK_ARG(15);
     CHECK_ARG(16);
 #undef CHECK_ARG
+    jit_patch_at(jit_jmpi(), pass);
+    jit_link(fail);
     jit_calli(abort);
     jit_link(pass);
     jit_ret();
@@ -428,10 +434,11 @@ main(int argc, char *argv[])
     LOAD_ARG(16);
 #undef LOAD_ARG
     pass = jit_forward();
+    fail = jit_forward();
 #define CHECK_ARG(N)                                                   \
     do {                                                               \
        jit_getarg_d(JIT_F0, a##N);                                     \
-       jit_patch_at(jit_beqi_d(JIT_F0, 17 - N), pass);                 \
+       jit_patch_at(jit_bnei_d(JIT_F0, 17 - N), fail);                 \
     } while (0)
     CHECK_ARG(1);
     CHECK_ARG(2);
@@ -450,6 +457,8 @@ main(int argc, char *argv[])
     CHECK_ARG(15);
     CHECK_ARG(16);
 #undef CHECK_ARG
+    jit_patch_at(jit_jmpi(), pass);
+    jit_link(fail);
     jit_calli(abort);
     jit_link(pass);
     jit_ret();
@@ -484,6 +493,7 @@ main(int argc, char *argv[])
        jit_pushargi(1);
     }
     jit_patch_at(jit_finishi(NULL), jw);
+
     jit_prepare();
     {
        jit_pushargi_f(16);
@@ -504,6 +514,7 @@ main(int argc, char *argv[])
        jit_pushargi_f(1);
     }
     jit_patch_at(jit_finishi(NULL), jf);
+
     jit_prepare();
     {
        jit_pushargi_d(16);
index ef09076..e1e2ea7 100644 (file)
@@ -150,7 +150,10 @@ main(int argc, char *argv[])
 #define join(tid)                                              \
     /* load pthread_t value in JIT_R0 */                       \
     jit_movi(JIT_R0, (jit_word_t)tids);                                \
-    jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t));         \
+    if (__WORDSIZE == 64 && sizeof(pthread_t) == 4)            \
+       jit_ldxi_i(JIT_R0, JIT_R0, tid * sizeof(pthread_t));    \
+    else                                                       \
+       jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t));      \
     jit_prepare();                                             \
     jit_pushargr(JIT_R0);                                      \
     jit_pushargi((jit_word_t)NULL);                            \
index 9dae256..3491f2e 100644 (file)
 #  define _l15                 _w15
 #endif
 
+#ifndef jit_arg_uc
+#  define jit_arg_uc           jit_arg_c
+#endif
+#ifndef jit_arg_us
+#  define jit_arg_us           jit_arg_s
+#endif
+#ifndef jit_arg_ui
+#  define jit_arg_ui           jit_arg_i
+#endif
+
 /*
  * Types
  */
@@ -624,7 +634,7 @@ main(int argc, char *argv[])
 #define arg15(T)               arg14(T)                a15 = jit_arg##T();
 
 #define get0(B,T,R)            jit_movi##B(R##0,0);
-#define get1(B,T,R)            jit_getarg##B(R##0,a##1);
+#define get1(B,T,R)            jit_getarg##T(R##0,a##1);
 #define get2(B,T,R)                                                    \
        get1(B,T,R);                                                    \
        jit_movr##B(R##1, R##0);                                        \
@@ -707,7 +717,7 @@ main(int argc, char *argv[])
     n##T##N = jit_name(strfy(n##T##N));                                        \
     jit_note("ccall.c", __LINE__);                                     \
     jit_prolog();                                                      \
-    arg##N();                                                          \
+    arg##N(T);                                                         \
     get##N(,T,JIT_R)                                                   \
     jit_extr##T(JIT_R0, JIT_R0);                                       \
     jit_retr(JIT_R0);                                                  \
@@ -777,7 +787,7 @@ main(int argc, char *argv[])
 
 #define calin(T,N)                                                     \
        jit_prepare();                                                  \
-               push##N(                                              \
+               push##N(T)                                              \
        jit_finishi(C##T##N);                                           \
        jit_retval##T(JIT_R0);                                          \
        jmp = jit_beqi(JIT_R0, T##N);                                   \
@@ -826,7 +836,7 @@ main(int argc, char *argv[])
 #undef calfn
 #define calin(T,N)                                                     \
        jit_prepare();                                                  \
-               push##N(                                              \
+               push##N(T)                                              \
        jit_finishi(CJ##T##N);                                          \
        jit_retval##T(JIT_R0);                                          \
        jmp = jit_beqi(JIT_R0, T##N);                                   \
diff --git a/deps/lightning/check/factorial.tst b/deps/lightning/check/factorial.tst
new file mode 100644 (file)
index 0000000..68adbb2
--- /dev/null
@@ -0,0 +1,73 @@
+.data  32
+str:
+.c     "%.0lf\n"
+.code
+       jmpi main
+/*
+ *     double factorial(unsigned long n) {
+ *             double r = 1;
+ *             while (n > 1) {
+ *                     r *= n;
+ *                     --n;
+ *             }
+ *             return r;
+ *     }
+ */
+factorial:
+       prolog
+       arg $n
+       getarg %r0 $n
+       movi_d %f0 1.0
+       extr_d %f1 %r0
+       movr_d %f2 %f0
+loop:
+       bltr_d done %f1 %f2
+       mulr_d %f0 %f0 %f1
+       subr_d %f1 %f1 %f2
+       jmpi loop
+done:
+       retr_d %f0
+       epilog
+
+/*
+ *     int main(int argc, char *argv[]) {
+ *             unsigned long v;
+ *             double d;
+ *             if (argc == 2)
+ *                     v = strtoul(argv[1], NULL, 0);
+ *             else
+ *                     v = 32;
+ *             d = factorial(v);
+ *             printf("%.0lf\n", d);
+ *             return 0;
+ *     }
+ */
+main:
+       prolog
+       arg $argc
+       arg $argv
+       getarg %r0 $argc
+       bnei default %r0 2
+       getarg %v0 $argv
+       ldxi %r0 %v0 $(__WORDSIZE >> 3)
+       prepare
+               pushargr %r0
+               pushargi 0
+               pushargi 0
+       finishi @strtoul
+       retval %v0
+       jmpi call
+default:
+    movi %v0 32
+call:
+       prepare
+               pushargr %v0
+       finishi factorial
+       retval_d %f0
+       prepare
+               pushargi str
+               ellipsis
+               pushargr_d %f0
+       finishi @printf
+       reti 0
+       epilog
index 0835323..926ee81 100644 (file)
@@ -32,7 +32,7 @@ main:
        arg $argc
        arg $argv
 
-       getarg_i %r0 $argc
+       getarg %r0 $argc
        blei default %r0 1
        getarg %r0 $argv
        addi %r0 %r0 $(__WORDSIZE >> 3)
index 05a0889..69a6caf 100644 (file)
@@ -14,9 +14,9 @@ ok:
 #  define x80                  0x8000000000000000
 #endif
 
-#if __mips__ || __sparc__ || __hppa__ || __riscv
+#if (__mips__ && __mips_isa_rev < 6)  || __sparc__ || __hppa__ || __riscv
 #  define wnan                 x7f
-#elif __arm__ || __aarch64__ || __alpha__ || __loongarch__
+#elif (__mips__ && __mips_isa_rev >= 6) || __arm__ || __aarch64__ || __alpha__ || __loongarch__
 #  define wnan                 0
 #else
 #  define wnan                 x80
index 4f3b052..80ea081 100644 (file)
@@ -270,10 +270,16 @@ static jit_pointer_t get_arg(void);
 static jit_word_t get_imm(void);
 static void live(void);
 static void align(void);       static void name(void);
+static void skip(void);
 static void prolog(void);
 static void frame(void);       static void tramp(void);
 static void ellipsis(void);
 static void allocai(void);     static void allocar(void);
+static void arg_c(void);       static void arg_s(void);
+static void arg_i(void);
+#if __WORDSIZE == 64
+static void arg_l(void);
+#endif
 static void arg(void);
 static void getarg_c(void);    static void getarg_uc(void);
 static void getarg_s(void);    static void getarg_us(void);
@@ -282,6 +288,15 @@ static void getarg_i(void);
 static void getarg_ui(void);   static void getarg_l(void);
 #endif
 static void getarg(void);
+static void putargr_c(void);   static void putargi_c(void);
+static void putargr_uc(void);  static void putargi_uc(void);
+static void putargr_s(void);   static void putargi_s(void);
+static void putargr_us(void);  static void putargi_us(void);
+static void putargr_i(void);   static void putargi_i(void);
+#if __WORDSIZE == 64
+static void putargr_ui(void);  static void putargi_ui(void);
+static void putargr_l(void);   static void putargi_l(void);
+#endif
 static void putargr(void);     static void putargi(void);
 static void addr(void);                static void addi(void);
 static void addxr(void);       static void addxi(void);
@@ -306,6 +321,8 @@ static void lshr(void);             static void lshi(void);
 static void rshr(void);                static void rshi(void);
 static void rshr_u(void);      static void rshi_u(void);
 static void negr(void);                static void comr(void);
+static void clor(void);                static void clzr(void);
+static void ctor(void);                static void ctzr(void);
 static void ltr(void);         static void lti(void);
 static void ltr_u(void);       static void lti_u(void);
 static void ler(void);         static void lei(void);
@@ -392,9 +409,30 @@ static void bxsubr_u(void);        static void bxsubi_u(void);
 static void jmpr(void);                static void jmpi(void);
 static void callr(void);       static void calli(void);
 static void prepare(void);
+
+static void pushargr_c(void);  static void pushargi_c(void);
+static void pushargr_uc(void); static void pushargi_uc(void);
+static void pushargr_s(void);  static void pushargi_s(void);
+static void pushargr_us(void); static void pushargi_us(void);
+static void pushargr_i(void);  static void pushargi_i(void);
+#if __WORDSIZE == 64
+static void pushargr_ui(void); static void pushargi_ui(void);
+static void pushargr_l(void);  static void pushargi_l(void);
+#endif
 static void pushargr(void);    static void pushargi(void);
+
 static void finishr(void);     static void finishi(void);
 static void ret(void);
+
+static void retr_c(void);      static void reti_c(void);
+static void retr_uc(void);     static void reti_uc(void);
+static void retr_s(void);      static void reti_s(void);
+static void retr_us(void);     static void reti_us(void);
+static void retr_i(void);      static void reti_i(void);
+#if __WORDSIZE == 64
+static void retr_ui(void);     static void reti_ui(void);
+static void retr_l(void);      static void reti_l(void);
+#endif
 static void retr(void);                static void reti(void);
 static void retval_c(void);    static void retval_uc(void);
 static void retval_s(void);    static void retval_us(void);
@@ -591,10 +629,16 @@ static instr_t              instr_vector[] = {
 #define entry2(name, function) { NULL, name, function }
     entry(live),
     entry(align),      entry(name),
+    entry(skip),
     entry(prolog),
     entry(frame),      entry(tramp),
     entry(ellipsis),
     entry(allocai),    entry(allocar),
+    entry(arg_c),      entry(arg_s),
+    entry(arg_i),
+#if __WORDSIZE == 64
+    entry(arg_l),
+#endif
     entry(arg),
     entry(getarg_c),   entry(getarg_uc),
     entry(getarg_s),   entry(getarg_us),
@@ -603,6 +647,16 @@ static instr_t               instr_vector[] = {
     entry(getarg_ui),  entry(getarg_l),
 #endif
     entry(getarg),
+
+    entry(putargr_c),  entry(putargi_c),
+    entry(putargr_uc), entry(putargi_uc),
+    entry(putargr_s),  entry(putargi_s),
+    entry(putargr_us), entry(putargi_us),
+    entry(putargr_i),  entry(putargi_i),
+#if __WORDSIZE == 64
+    entry(putargr_ui), entry(putargi_ui),
+    entry(putargr_l),  entry(putargi_l),
+#endif
     entry(putargr),    entry(putargi),
     entry(addr),       entry(addi),
     entry(addxr),      entry(addxi),
@@ -627,6 +681,8 @@ static instr_t                instr_vector[] = {
     entry(rshr),       entry(rshi),
     entry(rshr_u),     entry(rshi_u),
     entry(negr),       entry(comr),
+    entry(clor),       entry(clzr),
+    entry(ctor),       entry(ctzr),
     entry(ltr),                entry(lti),
     entry(ltr_u),      entry(lti_u),
     entry(ler),                entry(lei),
@@ -713,9 +769,27 @@ static instr_t               instr_vector[] = {
     entry(jmpr),       entry(jmpi),
     entry(callr),      entry(calli),
     entry(prepare),
+    entry(pushargr_c), entry(pushargi_c),
+    entry(pushargr_uc),        entry(pushargi_uc),
+    entry(pushargr_s), entry(pushargi_s),
+    entry(pushargr_us),        entry(pushargi_us),
+    entry(pushargr_i), entry(pushargi_i),
+#if __WORDSIZE == 64
+    entry(pushargr_ui),        entry(pushargi_ui),
+    entry(pushargr_l), entry(pushargi_l),
+#endif
     entry(pushargr),   entry(pushargi),
     entry(finishr),    entry(finishi),
     entry(ret),
+    entry(retr_c),     entry(reti_c),
+    entry(retr_uc),    entry(reti_uc),
+    entry(retr_s),     entry(reti_s),
+    entry(retr_us),    entry(reti_us),
+    entry(retr_i),     entry(reti_i),
+#if __WORDSIZE == 64
+    entry(retr_ui),    entry(reti_ui),
+    entry(retr_l),     entry(reti_l),
+#endif
     entry(retr),       entry(reti),
     entry(retval_c),   entry(retval_uc),
     entry(retval_s),   entry(retval_us),
@@ -1400,6 +1474,7 @@ live(void) {
     jit_live(parser.regval);
 }
 entry_im(align)
+entry_im(skip)
 entry(prolog)
 entry_im(frame)                        entry_im(tramp)
 entry(ellipsis)
@@ -1413,6 +1488,11 @@ allocai(void) {
     symbol->value.i = i;
 }
 entry_ir_ir(allocar)
+entry_ca(arg_c)                        entry_ca(arg_s)
+entry_ca(arg_i)
+#if __WORDSIZE == 64
+entry_ca(arg_l)
+#endif
 entry_ca(arg)
 entry_ia(getarg_c)             entry_ia(getarg_uc)
 entry_ia(getarg_s)             entry_ia(getarg_us)
@@ -1421,6 +1501,15 @@ entry_ia(getarg_i)
 entry_ia(getarg_ui)            entry_ia(getarg_l)
 #endif
 entry_ia(getarg)
+entry_ia(putargr_c)            entry_ima(putargi_c)
+entry_ia(putargr_uc)           entry_ima(putargi_uc)
+entry_ia(putargr_s)            entry_ima(putargi_s)
+entry_ia(putargr_us)           entry_ima(putargi_us)
+entry_ia(putargr_i)            entry_ima(putargi_i)
+#if __WORDSIZE == 64
+entry_ia(putargr_ui)           entry_ima(putargi_ui)
+entry_ia(putargr_l)            entry_ima(putargi_l)
+#endif
 entry_ia(putargr)              entry_ima(putargi)
 entry_ir_ir_ir(addr)           entry_ir_ir_im(addi)
 entry_ir_ir_ir(addxr)          entry_ir_ir_im(addxi)
@@ -1445,6 +1534,8 @@ entry_ir_ir_ir(lshr)              entry_ir_ir_im(lshi)
 entry_ir_ir_ir(rshr)           entry_ir_ir_im(rshi)
 entry_ir_ir_ir(rshr_u)         entry_ir_ir_im(rshi_u)
 entry_ir_ir(negr)              entry_ir_ir(comr)
+entry_ir_ir(clor)              entry_ir_ir(clzr)
+entry_ir_ir(ctor)              entry_ir_ir(ctzr)
 entry_ir_ir_ir(ltr)            entry_ir_ir_im(lti)
 entry_ir_ir_ir(ltr_u)          entry_ir_ir_im(lti_u)
 entry_ir_ir_ir(ler)            entry_ir_ir_im(lei)
@@ -1578,9 +1669,27 @@ entry_lb_ir_ir(bxsubr_u) entry_lb_ir_im(bxsubi_u)
 entry_ir(jmpr)                 entry_lb(jmpi)
 entry_ir(callr)                        entry_fn(calli)
 entry(prepare)
+entry_ir(pushargr_c)           entry_im(pushargi_c)
+entry_ir(pushargr_uc)          entry_im(pushargi_uc)
+entry_ir(pushargr_s)           entry_im(pushargi_s)
+entry_ir(pushargr_us)          entry_im(pushargi_us)
+entry_ir(pushargr_i)           entry_im(pushargi_i)
+#if __WORDSIZE == 64
+entry_ir(pushargr_ui)          entry_im(pushargi_ui)
+entry_ir(pushargr_l)           entry_im(pushargi_l)
+#endif
 entry_ir(pushargr)             entry_im(pushargi)
 entry_ir(finishr)              entry_fn(finishi)
 entry(ret)
+entry_ir(retr_c)               entry_im(reti_c)
+entry_ir(retr_uc)              entry_im(reti_uc)
+entry_ir(retr_s)               entry_im(reti_s)
+entry_ir(retr_us)              entry_im(reti_us)
+entry_ir(retr_i)               entry_im(reti_i)
+#if __WORDSIZE == 64
+entry_ir(retr_ui)              entry_im(reti_ui)
+entry_ir(retr_l)               entry_im(reti_l)
+#endif
 entry_ir(retr)                 entry_im(reti)
 entry_ir(retval_c)             entry_ir(retval_uc)
 entry_ir(retval_s)             entry_ir(retval_us)
@@ -4257,6 +4366,9 @@ main(int argc, char *argv[])
     opt_short += snprintf(cmdline + opt_short,
                          sizeof(cmdline) - opt_short,
                          " -D__mips__=1");
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__mips_isa_rev=%d", jit_cpu.release);
 #endif
 #if defined(__arm__)
     opt_short += snprintf(cmdline + opt_short,
diff --git a/deps/lightning/check/protect.c b/deps/lightning/check/protect.c
new file mode 100644 (file)
index 0000000..f5ec740
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Simple test of (un)protecting a code buffer.
+ */
+
+#include <lightning.h>
+#include <stdio.h>
+#include <assert.h>
+
+#define MARKER 10
+
+int
+main(int argc, char *argv[])
+{
+    jit_state_t                 *_jit;
+    jit_node_t           *load, *label, *ok;
+    unsigned char        *ptr;
+    void               (*function)(void);
+    int                          mmap_prot, mmap_flags;
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    jit_prolog();
+
+    load = jit_movi(JIT_R0, 0);
+    jit_ldr_c(JIT_R0, JIT_R0);
+    ok = jit_forward();
+    jit_patch_at(jit_beqi(JIT_R0, MARKER), ok);
+    jit_prepare();
+    jit_pushargi(1);
+    jit_finishi(exit);
+    label = jit_indirect();
+    jit_skip(1);             /* Reserves enough space for a byte.  */
+    jit_patch_at(load, label);
+    jit_link(ok);
+    jit_prepare();
+    jit_pushargi((jit_word_t)"%s\n");
+    jit_ellipsis();
+    jit_pushargi((jit_word_t)"ok");
+    jit_finishi(printf);
+
+    function = jit_emit();
+    if (function == NULL)
+       abort();
+
+    jit_unprotect ();
+    ptr = jit_address (label);
+    *ptr = MARKER;
+    jit_protect ();
+
+    jit_clear_state();
+
+    (*function)();
+
+    jit_destroy_state();
+    finish_jit();
+
+    return (0);
+}
index a7e39e1..65f1580 100644 (file)
@@ -9,49 +9,49 @@ ok:
 putr:
        prolog
        frame 160
-       arg $ac
-       arg $auc
-       arg $as
-       arg $aus
-       arg $ai
+       arg_c $ac
+       arg_c $auc
+       arg_s $as
+       arg_s $aus
+       arg_i $ai
 #if __WORDSIZE == 64
-       arg $aui
-       arg $al
+       arg_i $aui
+       arg_l $al
 #endif
        arg_f $af
        arg_d $ad
        arg $a
 #if __WORDSIZE == 64
-       arg $_l
-       arg $_ui
+       arg_l $_l
+       arg_i $_ui
 #endif
-       arg $_i
-       arg $_us
-       arg $_s
-       arg $_uc
-       arg $_c
+       arg_i $_i
+       arg_s $_us
+       arg_s $_s
+       arg_c $_uc
+       arg_c $_c
        getarg_c %r0 $ac
        negr %r0 %r0
-       putargr %r0 $ac
+       putargr_c %r0 $ac
        getarg_uc %r0 $auc
        negr %r0 %r0
-       putargr %r0 $auc
+       putargr_uc %r0 $auc
        getarg_s %r0 $as
        negr %r0 %r0
-       putargr %r0 $as
+       putargr_s %r0 $as
        getarg_us %r0 $aus
        negr %r0 %r0
-       putargr %r0 $aus
+       putargr_us %r0 $aus
        getarg_i %r0 $ai
        negr %r0 %r0
-       putargr %r0 $ai
+       putargr_i %r0 $ai
 #if __WORDSIZE == 64
        getarg_ui %r0 $aui
        negr %r0 %r0
-       putargr %r0 $aui
+       putargr_ui %r0 $aui
        getarg_l %r0 $al
        negr %r0 %r0
-       putargr %r0 $al
+       putargr_l %r0 $al
 #endif
        getarg_f %f0 $af
        negr_f %f0 %f0
@@ -65,49 +65,49 @@ putr:
 #if __WORDSIZE == 64
        getarg_l %r0 $_l
        negr %r0 %r0
-       putargr %r0 $_l
+       putargr_l %r0 $_l
        getarg_ui %r0 $_ui
        negr %r0 %r0
-       putargr %r0 $_ui
+       putargr_ui %r0 $_ui
 #endif
        getarg_i %r0 $_i
        negr %r0 %r0
-       putargr %r0 $_i
+       putargr_i %r0 $_i
        getarg_us %r0 $_us
        negr %r0 %r0
-       putargr %r0 $_us
+       putargr_us %r0 $_us
        getarg_s %r0 $_s
        negr %r0 %r0
-       putargr %r0 $_s
+       putargr_s %r0 $_s
        getarg_uc %r0 $_uc
        negr %r0 %r0
-       putargr %r0 $_uc
+       putargr_uc %r0 $_uc
        getarg_c %r0 $_c
        negr %r0 %r0
-       putargr %r0 $_c
+       putargr_c %r0 $_c
        jmpi _putr
 rputr:
-       putargi 17 $ac
-       putargi 16 $auc
-       putargi 15 $as
-       putargi 14 $aus
-       putargi 13 $ai
+       putargi_c 17 $ac
+       putargi_uc 16 $auc
+       putargi_s 15 $as
+       putargi_us 14 $aus
+       putargi_i 13 $ai
 #if __WORDSIZE == 64
-       putargi 12 $aui
-       putargi 11 $al
+       putargi_ui 12 $aui
+       putargi_l 11 $al
 #endif
        putargi_f 10 $af
        putargi_d 9 $ad
        putargi 8 $a
 #if __WORDSIZE == 64
-       putargi 7 $_l
-       putargi 6 $_ui
+       putargi_l 7 $_l
+       putargi_ui 6 $_ui
 #endif
-       putargi 5 $_i
-       putargi 4 $_us
-       putargi 3 $_s
-       putargi 2 $_uc
-       putargi 1 $_c
+       putargi_i 5 $_i
+       putargi_us 4 $_us
+       putargi_s 3 $_s
+       putargi_uc 2 $_uc
+       putargi_c 1 $_c
        jmpi _puti
 rputi:
        ret
@@ -117,27 +117,27 @@ rputi:
 _putr:
        prolog
        tramp 160
-       arg $ac
-       arg $auc
-       arg $as
-       arg $aus
-       arg $ai
+       arg_c $ac
+       arg_c $auc
+       arg_s $as
+       arg_s $aus
+       arg_i $ai
 #if __WORDSIZE == 64
-       arg $aui
-       arg $al
+       arg_i $aui
+       arg_l $al
 #endif
        arg_f $af
        arg_d $ad
        arg $a
 #if __WORDSIZE == 64
-       arg $_l
-       arg $_ui
+       arg_l $_l
+       arg_i $_ui
 #endif
-       arg $_i
-       arg $_us
-       arg $_s
-       arg $_uc
-       arg $_c
+       arg_i $_i
+       arg_s $_us
+       arg_s $_s
+       arg_c $_uc
+       arg_c $_c
        getarg_c %r0 $ac
        beqi rac %r0 -1
        calli @abort
@@ -181,7 +181,7 @@ rad:
        calli @abort
 ra:
 #if __WORDSIZE == 64
-       getarg %r0 $_l
+       getarg_l %r0 $_l
        beqi r_l %r0 -11
        calli @abort
 r_l:
@@ -217,27 +217,27 @@ r_c:
 _puti:
        prolog
        tramp 160
-       arg $ac
-       arg $auc
-       arg $as
-       arg $aus
-       arg $ai
+       arg_c $ac
+       arg_c $auc
+       arg_s $as
+       arg_s $aus
+       arg_i $ai
 #if __WORDSIZE == 64
-       arg $aui
-       arg $al
+       arg_i $aui
+       arg_l $al
 #endif
        arg_f $af
        arg_d $ad
        arg $a
 #if __WORDSIZE == 64
-       arg $_l
-       arg $_ui
+       arg_l $_l
+       arg_i $_ui
 #endif
-       arg $_i
-       arg $_us
-       arg $_s
-       arg $_uc
-       arg $_c
+       arg_i $_i
+       arg_s $_us
+       arg_s $_s
+       arg_c $_uc
+       arg_c $_c
        getarg_c %r0 $ac
        beqi iac %r0 17
        calli @abort
@@ -281,7 +281,7 @@ iad:
        calli @abort
 ia:
 #if __WORDSIZE == 64
-       getarg %r0 $_l
+       getarg_l %r0 $_l
        beqi i_l %r0 7
        calli @abort
 i_l:
@@ -390,27 +390,27 @@ fd2:
 main:
        prolog
        prepare
-               pushargi 1
-               pushargi 2
-               pushargi 3
-               pushargi 4
-               pushargi 5
+               pushargi_c  1
+               pushargi_uc 2
+               pushargi_s  3
+               pushargi_us 4
+               pushargi_i  5
 #if __WORDSIZE == 64
-               pushargi 6
-               pushargi 7
+               pushargi_ui 6
+               pushargi_l  7
 #endif
-               pushargi_f 8
-               pushargi_d 9
-               pushargi 10
+               pushargi_f  8
+               pushargi_d  9
+               pushargi    10
 #if __WORDSIZE == 64
-               pushargi 11
-               pushargi 12
+               pushargi_l  11
+               pushargi_ui 12
 #endif
-               pushargi 13
-               pushargi 14
-               pushargi 15
-               pushargi 16
-               pushargi 17
+               pushargi_i  13
+               pushargi_us 14
+               pushargi_s  15
+               pushargi_uc 16
+               pushargi_c  17
        finishi putr
        prepare
                pushargi 1
diff --git a/deps/lightning/check/riprel.c b/deps/lightning/check/riprel.c
new file mode 100644 (file)
index 0000000..c776e2a
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+ * Simple test for x86_64 rip relative access that can also be useful
+ * on other ports when data is close to instruction pointer.
+ */
+
+#include <lightning.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/mman.h>
+#if defined(__sgi)
+#  include <fcntl.h>
+#endif
+#include <unistd.h>
+
+#ifndef MAP_ANON
+#  define MAP_ANON                     MAP_ANONYMOUS
+#  ifndef MAP_ANONYMOUS
+#    define MAP_ANONYMOUS              0
+#  endif
+#endif
+
+#if !defined(__sgi)
+#define  mmap_fd                       -1
+#endif
+
+int
+main(int argc, char *argv[])
+{
+    jit_uint8_t                 *ptr;
+    jit_state_t                 *_jit;
+    jit_word_t           length;
+#if defined(__sgi)
+    int                          mmap_fd;
+#endif
+    void               (*function)(void);
+    int                          mmap_prot, mmap_flags, result, pagesize;
+    int                          mult;
+
+#if defined(__ia64__)
+    mult = 8;
+#else
+    mult = 2;
+#endif
+    pagesize = sysconf(_SC_PAGESIZE);
+    if (pagesize < 4096)
+       pagesize = 4096;
+
+#if defined(__sgi)
+    mmap_fd = open("/dev/zero", O_RDWR);
+#endif
+
+    mmap_prot = PROT_READ | PROT_WRITE;
+#if !(__OpenBSD__ || __APPLE__)
+    mmap_prot |= PROT_EXEC;
+#endif
+#if __NetBSD__
+    mmap_prot = PROT_MPROTECT(mmap_prot);
+    mmap_flags = 0;
+#else
+    mmap_flags = MAP_PRIVATE;
+#endif
+    mmap_flags |= MAP_ANON;
+    ptr = mmap(NULL, pagesize * mult,  mmap_prot, mmap_flags, mmap_fd, 0);
+    assert(ptr != MAP_FAILED);
+#if defined(__sgi)
+    close(mmap_fd);
+#endif
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    jit_prolog();
+    jit_movi(JIT_R0, 'c');
+    jit_sti_c(ptr + 0, JIT_R0);
+    jit_movi(JIT_R0, 'C');
+    jit_sti_c(ptr + 1, JIT_R0);
+
+    jit_movi(JIT_R0, 's');
+    jit_sti_s(ptr + 2, JIT_R0);
+    jit_movi(JIT_R0, 'S');
+    jit_sti_s(ptr + 4, JIT_R0);
+
+    jit_movi(JIT_R0, 'i');
+    jit_sti_i(ptr + 8, JIT_R0);
+#if __WORDSIZE == 64
+    jit_movi(JIT_R0, 'I');
+    jit_sti_i(ptr + 12, JIT_R0);
+
+    jit_movi(JIT_R0, 'l');
+    jit_sti_l(ptr + 16, JIT_R0);
+#endif
+    jit_movi_f(JIT_F0, 1.0);
+    jit_sti_f(ptr + 24, JIT_F0);
+    jit_movi_d(JIT_F0, 2.0);
+    jit_sti_d(ptr + 32, JIT_F0);
+
+    jit_ldi_c(JIT_R0, ptr + 0);
+    jit_ldi_s(JIT_R1, ptr + 2);
+    jit_ldi_i(JIT_R2, ptr + 8);
+#if __WORDSIZE == 64
+    jit_ldi_l(JIT_V0, ptr + 16);
+#endif
+    jit_prepare();
+#if __WORDSIZE == 64
+    jit_pushargi((jit_word_t)"%c %c %c %c\n");
+#else
+    jit_pushargi((jit_word_t)"%c %c %c l\n");
+#endif
+    jit_ellipsis();
+    jit_pushargr(JIT_R0);
+    jit_pushargr(JIT_R1);
+    jit_pushargr(JIT_R2);
+#if __WORDSIZE == 64
+    jit_pushargr(JIT_V0);
+#endif
+    jit_finishi(printf);
+
+    jit_ldi_uc(JIT_R0, ptr + 1);
+    jit_ldi_us(JIT_R1, ptr + 4);
+#if __WORDSIZE == 64
+    jit_ldi_ui(JIT_R2, ptr + 12);
+#endif
+    jit_prepare();
+#if __WORDSIZE == 64
+    jit_pushargi((jit_word_t)"%c %c %c\n");
+#else
+    jit_pushargi((jit_word_t)"%c %c I\n");
+#endif
+    jit_ellipsis();
+    jit_pushargr(JIT_R0);
+    jit_pushargr(JIT_R1);
+#if __WORDSIZE == 64
+    jit_pushargr(JIT_R2);
+#endif
+    jit_finishi(printf);
+
+    jit_ldi_f(JIT_F0, ptr + 24);
+    jit_extr_f_d(JIT_F0, JIT_F0);
+    jit_ldi_d(JIT_F1, ptr + 32);
+
+    jit_prepare();
+    jit_pushargi((jit_word_t)"%.1f %.1f\n");
+    jit_ellipsis();
+    jit_pushargr_d(JIT_F0);
+    jit_pushargr_d(JIT_F1);
+    jit_finishi(printf);
+
+    jit_realize();
+
+    jit_set_code(ptr + pagesize, pagesize * (mult - 1));
+
+ #if __NetBSD__ || __OpenBSD__ || __APPLE__
+    result = mprotect(ptr, pagesize, PROT_READ | PROT_WRITE);
+    assert(result == 0);
+#endif
+    function = jit_emit();
+    if (function == NULL)
+       abort();
+
+    //jit_disassemble();
+    jit_clear_state();
+#if __NetBSD__ ||  __OpenBSD__ || __APPLE__
+    result = mprotect(ptr + pagesize, pagesize, PROT_READ | PROT_EXEC);
+    assert(result == 0);
+#endif
+    (*function)();
+    jit_destroy_state();
+    finish_jit();
+
+    munmap(ptr, pagesize * mult);
+
+    return (0);
+}
diff --git a/deps/lightning/check/riprel.ok b/deps/lightning/check/riprel.ok
new file mode 100644 (file)
index 0000000..4b90837
--- /dev/null
@@ -0,0 +1,3 @@
+c s i l
+C S I
+1.0 2.0
index 62719ee..08611d9 100644 (file)
@@ -31,14 +31,14 @@ main(int argc, char *argv[])
     int                          mmap_fd;
 #endif
     void               (*function)(void);
-    int                          mmap_prot, mmap_flags;
+    int                          mmap_prot, mmap_flags, result;
 
 #if defined(__sgi)
     mmap_fd = open("/dev/zero", O_RDWR);
 #endif
 
     mmap_prot = PROT_READ | PROT_WRITE;
-#if !__OpenBSD__
+#if !(__OpenBSD__ || __APPLE__)
     mmap_prot |= PROT_EXEC;
 #endif
 #if __NetBSD__
@@ -83,7 +83,8 @@ main(int argc, char *argv[])
        abort();
 
 #if __NetBSD__
-    assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_WRITE) == 0);
+    result = mprotect(ptr, 1024 * 1024, PROT_READ | PROT_WRITE);
+    assert(result == 0);
 #endif
     /* and calling again with enough space works */
     jit_set_code(ptr, 1024 * 1024);
@@ -92,8 +93,9 @@ main(int argc, char *argv[])
        abort();
 
     jit_clear_state();
-#if __NetBSD__ ||  __OpenBSD__
-    assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_EXEC) == 0);
+#if __NetBSD__ ||  __OpenBSD__ || __APPLE__
+    result = mprotect(ptr, 1024 * 1024, PROT_READ | PROT_EXEC);
+    assert(result == 0);
 #endif
     (*function)();
     jit_destroy_state();
diff --git a/deps/lightning/check/skip.ok b/deps/lightning/check/skip.ok
new file mode 100644 (file)
index 0000000..f599e28
--- /dev/null
@@ -0,0 +1 @@
+10
diff --git a/deps/lightning/check/skip.tst b/deps/lightning/check/skip.tst
new file mode 100644 (file)
index 0000000..94eec76
--- /dev/null
@@ -0,0 +1,13 @@
+.data  32
+fmt:
+.c     "%d\n"
+.code
+       prolog
+        skip    4
+        prepare
+                pushargi fmt
+                ellipsis
+                pushargi 10
+        finishi @printf
+        ret
+        epilog
index e699719..1ebe4f5 100644 (file)
@@ -55,7 +55,7 @@ fill##T##done:                                                        \
 #define fill_us                fill_s
 #define fill_ui                fill_i
 
-#define ARG(  T, N)                    arg    $arg##T##N
+#define ARG(  T, N)                    arg##T $arg##T##N
 #define ARGF( T, N)                    arg##T $arg##T##N
 #define ARG1( K, T)                    ARG##K(T, 0)
 #define ARG2( K, T)    ARG1( K, T)     ARG##K(T, 1)
@@ -74,56 +74,56 @@ fill##T##done:                                                      \
 #define ARG15(K, T)    ARG14(K, T)     ARG##K(T, 14)
 #define ARG16(K, T)    ARG15(K, T)     ARG##K(T, 15)
 #define ARG_c(N)                       ARG##N( , _c)
-#define ARG_uc(N)                      ARG##N( , _uc)
+#define ARG_uc(N)                      ARG##N( , _c)
 #define ARG_s(N)                       ARG##N( , _s)
-#define ARG_us(N)                      ARG##N( , _us)
+#define ARG_us(N)                      ARG##N( , _s)
 #define ARG_i(N)                       ARG##N( , _i)
-#define ARG_ui(N)                      ARG##N( , _ui)
+#define ARG_ui(N)                      ARG##N( , _i)
 #define ARG_l(N)                       ARG##N( , _l)
 #define ARG_f(N)                       ARG##N(F, _f)
 #define ARG_d(N)                       ARG##N(F, _d)
 
-#define CHK(N, T, V)                                           \
-       getarg %r0 $arg##T##V                                   \
+#define CHK(N, T, TT, V)                                       \
+       getarg##T %r0 $arg##TT##V                               \
        ldxi##T %r1 %v0 $(V * szof##T)                          \
        beqr N##T##V %r0 %r1                                    \
        calli @abort                                            \
 N##T##V:
-#define CHKF(N, T, V)                                          \
-       getarg##T %f0 $arg##T##V                                \
+#define CHKF(N, T, TT, V)                                      \
+       getarg##T %f0 $arg##TT##V                               \
        ldxi##T %f1 %v0 $(V * szof##T)                          \
        beqr##T N##T##V %f0 %f1                                 \
        calli @abort                                            \
 N##T##V:
 
-#define GET1( K, N, T, V)                              CHK##K(N, T, 0)
-#define GET2( K, N, T, V)      GET1( K, N, T, V)       CHK##K(N, T, 1)
-#define GET3( K, N, T, V)      GET2( K, N, T, V)       CHK##K(N, T, 2)
-#define GET4( K, N, T, V)      GET3( K, N, T, V)       CHK##K(N, T, 3)
-#define GET5( K, N, T, V)      GET4( K, N, T, V)       CHK##K(N, T, 4)
-#define GET6( K, N, T, V)      GET5( K, N, T, V)       CHK##K(N, T, 5)
-#define GET7( K, N, T, V)      GET6( K, N, T, V)       CHK##K(N, T, 6)
-#define GET8( K, N, T, V)      GET7( K, N, T, V)       CHK##K(N, T, 7)
-#define GET9( K, N, T, V)      GET8( K, N, T, V)       CHK##K(N, T, 8)
-#define GET10(K, N, T, V)      GET9( K, N, T, V)       CHK##K(N, T, 9)
-#define GET11(K, N, T, V)      GET10(K, N, T, V)       CHK##K(N, T, 10)
-#define GET12(K, N, T, V)      GET11(K, N, T, V)       CHK##K(N, T, 11)
-#define GET13(K, N, T, V)      GET12(K, N, T, V)       CHK##K(N, T, 12)
-#define GET14(K, N, T, V)      GET13(K, N, T, V)       CHK##K(N, T, 13)
-#define GET15(K, N, T, V)      GET14(K, N, T, V)       CHK##K(N, T, 14)
-#define GET16(K, N, T, V)      GET15(K, N, T, V)       CHK##K(N, T, 15)
+#define GET1( K, N, T, TT, V)                          CHK##K(N, T, TT, 0)
+#define GET2( K, N, T, TT, V)  GET1( K, N, T, TT, V)   CHK##K(N, T, TT, 1)
+#define GET3( K, N, T, TT, V)  GET2( K, N, T, TT, V)   CHK##K(N, T, TT, 2)
+#define GET4( K, N, T, TT, V)  GET3( K, N, T, TT, V)   CHK##K(N, T, TT, 3)
+#define GET5( K, N, T, TT, V)  GET4( K, N, T, TT, V)   CHK##K(N, T, TT, 4)
+#define GET6( K, N, T, TT, V)  GET5( K, N, T, TT, V)   CHK##K(N, T, TT, 5)
+#define GET7( K, N, T, TT, V)  GET6( K, N, T, TT, V)   CHK##K(N, T, TT, 6)
+#define GET8( K, N, T, TT, V)  GET7( K, N, T, TT, V)   CHK##K(N, T, TT, 7)
+#define GET9( K, N, T, TT, V)  GET8( K, N, T, TT, V)   CHK##K(N, T, TT, 8)
+#define GET10(K, N, T, TT, V)  GET9( K, N, T, TT, V)   CHK##K(N, T, TT, 9)
+#define GET11(K, N, T, TT, V)  GET10(K, N, T, TT, V)   CHK##K(N, T, TT, 10)
+#define GET12(K, N, T, TT, V)  GET11(K, N, T, TT, V)   CHK##K(N, T, TT, 11)
+#define GET13(K, N, T, TT, V)  GET12(K, N, T, TT, V)   CHK##K(N, T, TT, 12)
+#define GET14(K, N, T, TT, V)  GET13(K, N, T, TT, V)   CHK##K(N, T, TT, 13)
+#define GET15(K, N, T, TT, V)  GET14(K, N, T, TT, V)   CHK##K(N, T, TT, 14)
+#define GET16(K, N, T, TT, V)  GET15(K, N, T, TT, V)   CHK##K(N, T, TT, 15)
 
-#define GET_c(N, M)            GET##N( , c##N,  _c,  M)
-#define GET_uc(N, M)           GET##N( , uc##N, _uc, M)
-#define GET_s(N, M)            GET##N( , s##N,  _s,  M)
-#define GET_us(N, M)           GET##N( , us##N, _us, M)
-#define GET_i(N, M)            GET##N( , i##N,  _i,  M)
-#define GET_ui(N, M)           GET##N( , ui##N, _ui, M)
-#define GET_l(N, M)            GET##N( , l##N,  _l,  M)
-#define GET_f(N, M)            GET##N(F, f##N,  _f,  M)
-#define GET_d(N, M)            GET##N(F, d##N,  _d,  M)
+#define GET_c(N, M)            GET##N( , c##N,  _c,  _c, M)
+#define GET_uc(N, M)           GET##N( , uc##N, _uc, _c, M)
+#define GET_s(N, M)            GET##N( , s##N,  _s,  _s, M)
+#define GET_us(N, M)           GET##N( , us##N, _us, _s, M)
+#define GET_i(N, M)            GET##N( , i##N,  _i,  _i, M)
+#define GET_ui(N, M)           GET##N( , ui##N, _ui, _i, M)
+#define GET_l(N, M)            GET##N( , l##N,  _l,  _l, M)
+#define GET_f(N, M)            GET##N(F, f##N,  _f,  _f, M)
+#define GET_d(N, M)            GET##N(F, d##N,  _d,  _d, M)
 
-#define PUSH(  T, V)           pushargi    V
+#define PUSH(  T, V)           pushargi##T V
 #define PUSHF( T, V)           pushargi##T V
 #define PUSH0( K, T)           /**/
 #define PUSH1( K, T)                                   PUSH##K(T, 0)
@@ -161,14 +161,14 @@ test##T##_0:                                                      \
        ret                                                     \
        epilog
 
-#define DEFN(N, M, T)                                          \
+#define DEFN(N, M, T, TT)                                      \
        name test##T##_##N                                      \
 test##T##_##N:                                                 \
        prolog                                                  \
        arg $argp                                               \
        /* stack buffer in %v0 */                               \
        getarg %v0 $argp                                        \
-       ARG##T(N)                                               \
+       ARG##TT(N)                                              \
        /* validate arguments */                                \
        GET##T(N, M)                                            \
        /* heap buffer in %v1 */                                \
@@ -258,24 +258,24 @@ test##T##_17_done:                                                \
        ret                                                     \
        epilog
 
-#define DEF(  T)                                               \
+#define DEF(  T, TT)                                           \
        DEF0( T)                                                \
-       DEFN( 1,  0, T)                                         \
-       DEFN( 2,  1, T)                                         \
-       DEFN( 3,  2, T)                                         \
-       DEFN( 4,  3, T)                                         \
-       DEFN( 5,  4, T)                                         \
-       DEFN( 6,  5, T)                                         \
-       DEFN( 7,  6, T)                                         \
-       DEFN( 8,  7, T)                                         \
-       DEFN( 9,  8, T)                                         \
-       DEFN(10,  9, T)                                         \
-       DEFN(11, 10, T)                                         \
-       DEFN(12, 11, T)                                         \
-       DEFN(13, 12, T)                                         \
-       DEFN(14, 13, T)                                         \
-       DEFN(15, 14, T)                                         \
-       DEFN(16, 15, T)                                         \
+       DEFN( 1,  0, T, TT)                                     \
+       DEFN( 2,  1, T, TT)                                     \
+       DEFN( 3,  2, T, TT)                                     \
+       DEFN( 4,  3, T, TT)                                     \
+       DEFN( 5,  4, T, TT)                                     \
+       DEFN( 6,  5, T, TT)                                     \
+       DEFN( 7,  6, T, TT)                                     \
+       DEFN( 8,  7, T, TT)                                     \
+       DEFN( 9,  8, T, TT)                                     \
+       DEFN(10,  9, T, TT)                                     \
+       DEFN(11, 10, T, TT)                                     \
+       DEFN(12, 11, T, TT)                                     \
+       DEFN(13, 12, T, TT)                                     \
+       DEFN(14, 13, T, TT)                                     \
+       DEFN(15, 14, T, TT)                                     \
+       DEFN(16, 15, T, TT)                                     \
        DEFX(T)
 
 #define CALL(T)                        calli test##T##_17
@@ -321,17 +321,17 @@ memcpy_done:
        FILLF(_f)
        FILLF(_d)
 
-       DEF(_c)
-       DEF(_uc)
-       DEF(_s)
-       DEF(_us)
-       DEF(_i)
+       DEF(_c, _c)
+       DEF(_uc, _c)
+       DEF(_s, _s)
+       DEF(_us, _s)
+       DEF(_i, _i)
 #if __WORDSIZE == 64
-       DEF(_ui)
-       DEF(_l)
+       DEF(_ui, _i)
+       DEF(_l, _l)
 #endif
-       DEF(_f)
-       DEF(_d)
+       DEF(_f, _f)
+       DEF(_d, _d)
 
        name main
 main:
index 39d2209..3fb09e7 100644 (file)
@@ -1,5 +1,5 @@
 dnl
-dnl Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc.
 dnl
 dnl This file is part of GNU lightning.
 dnl
@@ -15,7 +15,7 @@ dnl License for more details.
 dnl
 
 AC_PREREQ([2.71])
-AC_INIT([GNU lightning],[2.1.3],[pcpa@gnu.org],[lightning])
+AC_INIT([GNU lightning],[2.2.1],[pcpa@gnu.org],[lightning])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CANONICAL_TARGET
 AC_CONFIG_SRCDIR([Makefile.am])
@@ -60,6 +60,12 @@ case "$target_cpu" in
                fi                              ;;
            *)                                  ;;
        esac                                    ;;
+    aarch64)
+       case "$host_os" in
+           darwin*)
+               LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DPACKED_STACK=1"                   ;;
+           *)                                  ;;
+       esac                                    ;;
     *)                                         ;;
 esac
 
@@ -163,29 +169,43 @@ if test "x$DEVEL_DISASSEMBLER" != "xno"; then
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEVEL_DISASSEMBLER=1"
 fi
 
+# This option is only useful during development.
+AC_ARG_ENABLE(devel-get-jit-size,
+             AS_HELP_STRING([--enable-devel-get-jit-size],
+                            [Devel mode to regenerate jit size information]),
+             [GET_JIT_SIZE=$enableval], [GET_JIT_SIZE=no])
+AM_CONDITIONAL(get_jit_size, [test $GET_JIT_SIZE = yes])
+
 AC_ARG_ENABLE(assertions,
              AS_HELP_STRING([--enable-assertions],
                             [Enable runtime code generation assertions]),
              [DEBUG=$enableval], [DEBUG=auto])
-if test "x$DEBUG" = xyes; then
+
+# This option might be made default in the future
+# Currently it is only useful to ensure existing code will work
+# if PACKED_STACK is also defined.
+AC_ARG_ENABLE(devel-strong-type-checking,
+             AS_HELP_STRING([--enable-devel-strong-type-checking],
+                            [Devel mode for strong type checking]),
+             [STRONG_TYPE_CHECKING=$enableval], [STRONG_TYPE_CHECKING=no])
+if test "x$DEBUG" = xyes -o x"$STRONG_TYPE_CHECKING" = xyes; then
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEBUG=1"
 else
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DNDEBUG"
     DEBUG=no
 fi
+AM_CONDITIONAL(strong_type_checking, [test $STRONG_TYPE_CHECKING = yes])
 
-# This option is only useful during development.
-AC_ARG_ENABLE(devel-get-jit-size,
-             AS_HELP_STRING([--enable-devel-get-jit-size],
-                            [Devel mode to regenerate jit size information]),
-             [GET_JIT_SIZE=$enableval], [GET_JIT_SIZE=no])
-AM_CONDITIONAL(get_jit_size, [test $GET_JIT_SIZE = yes])
+AC_CHECK_LIB(dl, dlopen, [HAVE_LIBDL="yes"])
+AC_CHECK_LIB(dld, dlopen, [HAVE_LIBDLD="yes"])
 
-case "$host_os" in
-    *bsd*|osf*)                SHLIB=""        ;;
-    *hpux*)            SHLIB="-ldld"   ;;
-    *)                 SHLIB="-ldl"    ;;
-esac
+if test "x$HAVE_LIBDL" = xyes; then
+    SHLIB="-ldl";
+elif test "x$HAVE_LIBDLD" = xyes; then
+    SHLIB="-ldld";
+else
+    SHLIB="";
+fi
 AC_SUBST(SHLIB)
 
 cpu=
@@ -233,7 +253,7 @@ elif test $cpu = x86; then
     int main(void) {
        int                 ac, flags;
        unsigned int        eax, ebx, ecx, edx;
-       if (__WORDSIZE == 64)
+       if (sizeof(long) == 8)
            return 1;
        __asm__ volatile ("pushfl;\n\t"
                          "popl %0;\n\t"
index 6398bce..4cec67e 100644 (file)
@@ -1,5 +1,5 @@
 #
-# Copyright 2012-2022 Free Software Foundation, Inc.
+# Copyright 2012-2023 Free Software Foundation, Inc.
 #
 # This file is part of GNU lightning.
 #
@@ -14,7 +14,8 @@
 # License for more details.
 #
 
-AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
+AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include \
+       -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 
 info_TEXINFOS = lightning.texi
 MOSTLYCLEANFILES = lightning.tmp
index 1d8d277..1bd3f67 100644 (file)
@@ -101,17 +101,30 @@ the @file{configure} shell script; to run it, merely type:
      ./configure
 @end example
 
-@lightning{} supports the @code{--enable-disassembler} option, that
-enables linking to GNU binutils and optionally print human readable
+The @file{configure} accepts the @code{--enable-disassembler} option,
+hat enables linking to GNU binutils and optionally print human readable
 disassembly of the jit code. This option can be disabled by the
 @code{--disable-disassembler} option.
 
-Another option that @file{configure} accepts is
-@code{--enable-assertions}, which enables several consistency checks in
-the run-time assemblers.  These are not usually needed, so you can
-decide to simply forget about it; also remember that these consistency
+@file{configure} also accepts the  @code{--enable-devel-disassembler},
+option useful to check exactly hat machine instructions were generated
+for a @lightning{} instrction. Basically mixing @code{jit_print} and
+@code{jit_disassembly}.
+
+The @code{--enable-assertions} option, which enables several consistency
+hecks in the run-time assemblers.  These are not usually needed, so you
+can decide to simply forget about it; also remember that these consistency
 checks tend to slow down your code generator.
 
+The @code{--enable-devel-strong-type-checking} option that does extra type
+checking using @code{assert}. This option also enables the
+@code{--enable-assertions} unless it is explicitly disabled.
+
+The option @code{--enable-devel-get-jit-size} should only be used
+when doing updates or maintenance to lightning. It regenerates the
+@code{jit_$ARCH]-sz.c} creating a table or maximum bytes usage when
+translating a @lightning{} instruction to machine code.
+
 After you've configured @lightning{}, run @file{make} as usual.
 
 @lightning{} has an extensive set of tests to validate it is working
@@ -278,12 +291,27 @@ These accept two operands, both of which must be registers.
 @example
 negr         _f  _d  O1 = -O2
 comr                 O1 = ~O2
+clor                O1 = number of leading one bits
+clzr                O1 = number of leading zero bits
+ctor                O1 = number of trailing one bits
+ctzr                O1 = number of trailing zero bits
 @end example
 
+Note that @code{ctzr} is basically equivalent of a @code{C} call
+@code{ffs} but indexed at bit zero, not one.
+
+Contrary to @code{__builtin_ctz} and @code{__builtin_clz}, an input
+value of zero is not an error, it just returns the number of bits
+in a word, 64 if @lightning{} generates 64 bit instructions, otherwise
+it returns 32.
+
+The @code{clor} and @code{ctor} are just counterparts of the versions
+that search for zero bits.
+
 These unary ALU operations are only defined for float operands.
 @example
 absr         _f  _d  O1 = fabs(O2)
-sqrtr                O1 = sqrt(O2)
+sqrtr        _f  _d  O1 = sqrt(O2)
 @end example
 
 Besides requiring the @code{r} modifier, there are no unary operations
@@ -401,31 +429,33 @@ ldxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
 both cases, the first can be either a register or an immediate
 value. Values are sign-extended to fit a whole register.
 @example
-str     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
-sti     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
-stxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
-stxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
+str     _c       _s       _i       _l  _f  _d  *O1 = O2
+sti     _c       _s       _i       _l  _f  _d  *O1 = O2
+stxr    _c       _s       _i       _l  _f  _d  *(O1+O2) = O3
+stxi    _c       _s       _i       _l  _f  _d  *(O1+O2) = O3
 @end example
-As for the load operations, the @code{_ui} and @code{_l} types are
-only available in 64-bit architectures, and for convenience, there
-is a version without a type modifier for integer or pointer operands
-that uses the appropriate wordsize call.
+Note that the unsigned type modifier is not available, as the store
+only writes to the 1, 2, 4 or 8 sized memory address.
+The @code{_l} type is only available in 64-bit architectures, and for
+convenience, there is a version without a type modifier for integer or
+pointer operands that uses the appropriate wordsize call.
 
 @item Argument management
 These are:
 @example
 prepare     (not specified)
 va_start    (not specified)
-pushargr                                   _f  _d
-pushargi                                   _f  _d
+pushargr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+pushargi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 va_push     (not specified)
-arg                                        _f  _d
+arg         _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 getarg      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 va_arg                                         _d
-putargr                                    _f  _d
-putargi                                    _f  _d
+putargr     _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+putargi     _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 ret         (not specified)
-retr                                       _f  _d
+retr        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+reti        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 reti                                       _f  _d
 va_end      (not specified)
 retval      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
@@ -444,6 +474,15 @@ the @code{pushargr} or @code{pushargi} to push the arguments @strong{in
 left to right order}; and use @code{finish} or @code{call} (explained below)
 to perform the actual call.
 
+Note that @code{arg}, @code{pusharg}, @code{putarg} and @code{ret} when
+handling integer types can be used without a type modifier.
+It is suggested to use matching type modifiers to @code{arg}, @code{putarg}
+and @code{getarg} otherwise problems will happen if generating jit for
+environments that require arguments to be truncated and zero or sign
+extended by the caller and/or excess arguments might be passed packed
+in the stack. Currently only Apple systems with @code{aarch64} cpus are
+known to have this restriction.
+
 @code{va_start} returns a @code{C} compatible @code{va_list}. To fetch
 arguments, use @code{va_arg} for integers and @code{va_arg_d} for doubles.
 @code{va_push} is required when passing a @code{va_list} to another function,
@@ -565,6 +604,10 @@ bxsubr    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
 bxsubi    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
 @end example
 
+Note that the @code{C} code does not have an @code{O1} argument. It is
+required to always use the return value as an argument to @code{patch},
+@code{patch_at} or @code{patch_abs}.
+
 @item Jump and return operations
 These accept one argument except @code{ret} and @code{jmpi} which
 have none; the difference between @code{finishi} and @code{calli}
@@ -603,6 +646,14 @@ the next instruction, usually with a label:
 align     (not specified)                @r{align code}
 @end example
 
+Similar to @code{align} is the next instruction, also usually used with
+a label:
+@example
+skip      (not specified)                @r{skip code}
+@end example
+It is used to specify a minimal number of bytes of nops to be inserted
+before the next instruction.
+
 @code{label} is normally used as @code{patch_at} argument for backward
 jumps.
 
@@ -687,6 +738,10 @@ label2  = jit_indirect();                @rem{/* second entry point */}
           assert(addr2 - addr1 == 16);   @rem{/* only one of the addresses needs to be remembered */}
 @end example
 
+@code{skip} is useful for reserving space in the code buffer that can
+later be filled (possibly with the help of the pair of functions
+@code{jit_unprotect} and @code{jit_protect}).
+
 @item Function prolog
 
 These macros are used to set up a function prolog.  The @code{allocai}
@@ -919,7 +974,7 @@ will return non zero if the argument lives in a register. This call
 is useful to know the live range of register arguments, as those
 are very fast to read and write, but have volatile values.
 
-@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
+@code{callee_save_p} expects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
 @code{JIT_Fn}, and will return non zero if the register is callee
 save. This call is useful because on several ports, the @code{JIT_Rn}
 and @code{JIT_Fn} registers are actually callee save; no need
@@ -1144,26 +1199,13 @@ maps to @code{%g2} on the SPARC).
 @table @b
 @item x86_64
 @example
-    sub   $0x30,%rsp
-    mov   %rbp,(%rsp)
-    mov   %rsp,%rbp
-    sub   $0x18,%rsp
-    mov   %rdi,%rax            mov %rdi, %rax
-    add   $0x1,%rax            inc %rax
-    mov   %rbp,%rsp
-    mov   (%rsp),%rbp
-    add   $0x30,%rsp
-    retq                       retq
+    mov   %rdi,%rax
+    add   $0x1,%rax
+    ret
 @end example
-In this case, the main overhead is due to the function's prolog and
-epilog, and stack alignment after reserving stack space for word
-to/from float conversions or moving data from/to x87 to/from SSE.
-Note that besides allocating space to save callee saved registers,
-no registers are saved/restored because @lightning{} notices those
-registers are not modified. There is currently no logic to detect
-if it needs to allocate stack space for type conversions neither
-proper leaf function detection, but these are subject to change
-(FIXME).
+In this case, for the x86 port, @lightning{} has simple optimizations
+to understand it is a leaf function, and that it is not required to
+create a stack frame nor update the stack pointer.
 @end table
 
 @node printf
@@ -1327,7 +1369,7 @@ jit_node_t *compile_rpn(char *expr)
   in = jit_arg();
   stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
 
-  jit_getarg_i(JIT_R2, in);
+  jit_getarg(JIT_R2, in);
 
   while (*expr) @{
     char buf[32];
@@ -1680,6 +1722,28 @@ Get the current memory allocation function. Also, unlike the GNU GMP
 counterpart, it is an error to pass @code{NULL} pointers as arguments.
 @end deftypefun
 
+@section Protection
+Unless an alternate code buffer is used (see below), @code{jit_emit}
+set the access protections that the code buffer's memory can be read and
+executed, but not modified.  One can use the following functions after
+@code{jit_emit} but before @code{jit_clear} to temporarily lift the
+protection:
+
+@deftypefun void jit_unprotect ()
+Changes the access protection that the code buffer's memory can be read and
+modified.  Before the emitted code can be invoked, @code{jit_protect}
+has to be called to reset the change.
+
+This procedure has no effect when an alternate code buffer (see below) is used.
+@end deftypefun
+
+@deftypefun void jit_protect ()
+Changes the access protection that the code buffer's memory can be read and
+executed.
+
+This procedure has no effect when an alternate code buffer (see below) is used.
+@end deftypefun
+
 @section Alternate code buffer
 To instruct @lightning{} to use an alternate code buffer it is required
 to call @code{jit_realize} before @code{jit_emit}, and then query states
index 8131484..edb3d8e 100644 (file)
@@ -24,7 +24,7 @@ jit_node_t *compile_rpn(char *expr)
 
   fn = jit_note(NULL, 0);
   jit_prolog();
-  in = jit_arg();
+  in = jit_arg_i();
   stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
 
   jit_getarg_i(JIT_R2, in);
index ce622e2..bd48777 100644 (file)
@@ -1,5 +1,5 @@
 #
-# Copyright 2000, 2001, 2002, 2012-2022 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc.
 #
 # This file is part of GNU lightning.
 #
index 67c6af1..7aa654c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -190,6 +190,8 @@ typedef enum {
 #define jit_align(u)           jit_new_node_w(jit_code_align, u)
     jit_code_live,             jit_code_align,
     jit_code_save,             jit_code_load,
+#define jit_skip(u)             jit_new_node_w(jit_code_skip, u)
+    jit_code_skip,
 #define jit_name(u)            _jit_name(_jit,u)
     jit_code_name,
 #define jit_note(u, v)         _jit_note(_jit, u, v)
@@ -210,27 +212,80 @@ typedef enum {
 #define jit_allocar(u, v)      _jit_allocar(_jit,u,v)
     jit_code_allocai,          jit_code_allocar,
 
-#define jit_arg()              _jit_arg(_jit)
-    jit_code_arg,
+#define jit_arg_c()            _jit_arg(_jit, jit_code_arg_c)
+#define jit_arg_s()            _jit_arg(_jit, jit_code_arg_s)
+#define jit_arg_i()            _jit_arg(_jit, jit_code_arg_i)
+# if __WORDSIZE == 32
+#  define jit_arg()            jit_arg_i()
+#else
+#  define jit_arg_l()          _jit_arg(_jit, jit_code_arg_l)
+#  define jit_arg()            jit_arg_l()
+#endif
+    jit_code_arg_c,            jit_code_arg_s,
+    jit_code_arg_i,            jit_code_arg_l,
+#if __WORDSIZE == 32
+#  define jit_code_arg         jit_code_arg_i
+#else
+#  define jit_code_arg         jit_code_arg_l
+#endif
+
 #define jit_getarg_c(u,v)      _jit_getarg_c(_jit,u,v)
 #define jit_getarg_uc(u,v)     _jit_getarg_uc(_jit,u,v)
-    jit_code_getarg_c,         jit_code_getarg_uc,
 #define jit_getarg_s(u,v)      _jit_getarg_s(_jit,u,v)
 #define jit_getarg_us(u,v)     _jit_getarg_us(_jit,u,v)
-    jit_code_getarg_s,         jit_code_getarg_us,
 #define jit_getarg_i(u,v)      _jit_getarg_i(_jit,u,v)
 #if __WORDSIZE == 32
 #  define jit_getarg(u,v)      jit_getarg_i(u,v)
 #else
-#  define jit_getarg(u,v)      jit_getarg_l(u,v)
 #  define jit_getarg_ui(u,v)   _jit_getarg_ui(_jit,u,v)
 #  define jit_getarg_l(u,v)    _jit_getarg_l(_jit,u,v)
+#  define jit_getarg(u,v)      jit_getarg_l(u,v)
 #endif
+    jit_code_getarg_c,         jit_code_getarg_uc,
+    jit_code_getarg_s,         jit_code_getarg_us,
     jit_code_getarg_i,         jit_code_getarg_ui,
     jit_code_getarg_l,
-#  define jit_putargr(u,v)     _jit_putargr(_jit,u,v)
-#  define jit_putargi(u,v)     _jit_putargi(_jit,u,v)
-    jit_code_putargr,          jit_code_putargi,
+#if __WORDSIZE == 32
+#  define jit_code_getarg      jit_code_getarg_i
+#else
+#  define jit_code_getarg      jit_code_getarg_l
+#endif
+
+#define jit_putargr_c(u,v)     _jit_putargr(_jit,u,v,jit_code_putargr_c)
+#define jit_putargi_c(u,v)     _jit_putargi(_jit,u,v,jit_code_putargi_c)
+#define jit_putargr_uc(u,v)    _jit_putargr(_jit,u,v,jit_code_putargr_uc)
+#define jit_putargi_uc(u,v)    _jit_putargi(_jit,u,v,jit_code_putargi_uc)
+#define jit_putargr_s(u,v)     _jit_putargr(_jit,u,v,jit_code_putargr_s)
+#define jit_putargi_s(u,v)     _jit_putargi(_jit,u,v,jit_code_putargi_s)
+#define jit_putargr_us(u,v)    _jit_putargr(_jit,u,v,jit_code_putargr_us)
+#define jit_putargi_us(u,v)    _jit_putargi(_jit,u,v,jit_code_putargi_us)
+#define jit_putargr_i(u,v)     _jit_putargr(_jit,u,v,jit_code_putargr_i)
+#define jit_putargi_i(u,v)     _jit_putargi(_jit,u,v,jit_code_putargi_i)
+#if __WORDSIZE == 32
+#  define jit_putargr(u,v)     jit_putargr_i(u,v)
+#  define jit_putargi(u,v)     jit_putargi_i(u,v)
+#else
+#  define jit_putargr_ui(u,v)  _jit_putargr(_jit,u,v,jit_code_putargr_ui)
+#  define jit_putargi_ui(u,v)  _jit_putargi(_jit,u,v,jit_code_putargi_ui)
+#  define jit_putargr_l(u,v)   _jit_putargr(_jit,u,v,jit_code_putargr_l)
+#  define jit_putargi_l(u,v)   _jit_putargi(_jit,u,v,jit_code_putargi_l)
+#  define jit_putargr(u,v)     jit_putargr_l(u,v)
+#  define jit_putargi(u,v)     jit_putargi_l(u,v)
+#endif
+    jit_code_putargr_c,                jit_code_putargi_c,
+    jit_code_putargr_uc,       jit_code_putargi_uc,
+    jit_code_putargr_s,                jit_code_putargi_s,
+    jit_code_putargr_us,       jit_code_putargi_us,
+    jit_code_putargr_i,                jit_code_putargi_i,
+    jit_code_putargr_ui,       jit_code_putargi_ui,
+    jit_code_putargr_l,                jit_code_putargi_l,
+#if __WORDSIZE == 32
+#  define jit_code_putargr     jit_code_putargr_i
+#  define jit_code_putargi     jit_code_putargi_i
+#else
+#  define jit_code_putargr     jit_code_putargr_l
+#  define jit_code_putargi     jit_code_putargi_l
+#endif
 
 #define jit_va_start(u)                jit_new_node_w(jit_code_va_start, u)
     jit_code_va_start,
@@ -352,6 +407,10 @@ typedef enum {
 #define jit_movzr(u,v,w)       jit_new_node_www(jit_code_movzr,u,v,w)
     jit_code_movnr,            jit_code_movzr,
 
+    jit_code_casr,             jit_code_casi,
+#define jit_casr(u, v, w, x)   jit_new_node_wwq(jit_code_casr, u, v, w, x)
+#define jit_casi(u, v, w, x)   jit_new_node_wwq(jit_code_casi, u, v, w, x)
+
 #define jit_extr_c(u,v)                jit_new_node_ww(jit_code_extr_c,u,v)
 #define jit_extr_uc(u,v)       jit_new_node_ww(jit_code_extr_uc,u,v)
     jit_code_extr_c,           jit_code_extr_uc,
@@ -364,6 +423,18 @@ typedef enum {
 #endif
     jit_code_extr_i,           jit_code_extr_ui,
 
+#define jit_bswapr_us(u,v)     jit_new_node_ww(jit_code_bswapr_us,u,v)
+    jit_code_bswapr_us,
+#define jit_bswapr_ui(u,v)     jit_new_node_ww(jit_code_bswapr_ui,u,v)
+    jit_code_bswapr_ui,
+#define jit_bswapr_ul(u,v)     jit_new_node_ww(jit_code_bswapr_ul,u,v)
+    jit_code_bswapr_ul,
+#if __WORDSIZE == 32
+#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ui,u,v)
+#else
+#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ul,u,v)
+#endif
+
 #define jit_htonr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
 #define jit_ntohr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
     jit_code_htonr_us,
@@ -550,33 +621,106 @@ typedef enum {
 
 #define jit_prepare()          _jit_prepare(_jit)
     jit_code_prepare,
-#define jit_pushargr(u)                _jit_pushargr(_jit,u)
-#define jit_pushargi(u)                _jit_pushargi(_jit,u)
-    jit_code_pushargr,         jit_code_pushargi,
+
+#define jit_pushargr_c(u)      _jit_pushargr(_jit,u,jit_code_pushargr_c)
+#define jit_pushargi_c(u)      _jit_pushargi(_jit,u,jit_code_pushargi_c)
+#define jit_pushargr_uc(u)     _jit_pushargr(_jit,u,jit_code_pushargr_uc)
+#define jit_pushargi_uc(u)     _jit_pushargi(_jit,u,jit_code_pushargi_uc)
+#define jit_pushargr_s(u)      _jit_pushargr(_jit,u,jit_code_pushargr_s)
+#define jit_pushargi_s(u)      _jit_pushargi(_jit,u,jit_code_pushargi_s)
+#define jit_pushargr_us(u)     _jit_pushargr(_jit,u,jit_code_pushargr_us)
+#define jit_pushargi_us(u)     _jit_pushargi(_jit,u,jit_code_pushargi_us)
+#define jit_pushargr_i(u)      _jit_pushargr(_jit,u,jit_code_pushargr_i)
+#define jit_pushargi_i(u)      _jit_pushargi(_jit,u,jit_code_pushargi_i)
+#if __WORDSIZE == 32
+#  define jit_pushargr(u)      jit_pushargr_i(u)
+#  define jit_pushargi(u)      jit_pushargi_i(u)
+#else
+#  define jit_pushargr_ui(u)   _jit_pushargr(_jit,u,jit_code_pushargr_ui)
+#  define jit_pushargi_ui(u)   _jit_pushargi(_jit,u,jit_code_pushargi_ui)
+#  define jit_pushargr_l(u)    _jit_pushargr(_jit,u,jit_code_pushargr_l)
+#  define jit_pushargi_l(u)    _jit_pushargi(_jit,u,jit_code_pushargi_l)
+#  define jit_pushargr(u)      jit_pushargr_l(u)
+#  define jit_pushargi(u)      jit_pushargi_l(u)
+#endif
+    jit_code_pushargr_c,       jit_code_pushargi_c,
+    jit_code_pushargr_uc,      jit_code_pushargi_uc,
+    jit_code_pushargr_s,       jit_code_pushargi_s,
+    jit_code_pushargr_us,      jit_code_pushargi_us,
+    jit_code_pushargr_i,       jit_code_pushargi_i,
+    jit_code_pushargr_ui,      jit_code_pushargi_ui,
+    jit_code_pushargr_l,       jit_code_pushargi_l,
+#if __WORDSIZE == 32
+#  define jit_code_pushargr    jit_code_pushargr_i
+#  define jit_code_pushargi    jit_code_pushargi_i
+#else
+#  define jit_code_pushargr    jit_code_pushargr_l
+#  define jit_code_pushargi    jit_code_pushargi_l
+#endif
+
 #define jit_finishr(u)         _jit_finishr(_jit,u)
 #define jit_finishi(u)         _jit_finishi(_jit,u)
     jit_code_finishr,          jit_code_finishi,
 #define jit_ret()              _jit_ret(_jit)
     jit_code_ret,
-#define jit_retr(u)            _jit_retr(_jit,u)
-#define jit_reti(u)            _jit_reti(_jit,u)
-    jit_code_retr,             jit_code_reti,
+
+#define jit_retr_c(u)          _jit_retr(_jit,u,jit_code_retr_c)
+#define jit_reti_c(u)          _jit_reti(_jit,u,jit_code_reti_c)
+#define jit_retr_uc(u)         _jit_retr(_jit,u,jit_code_retr_uc)
+#define jit_reti_uc(u)         _jit_reti(_jit,u,jit_code_reti_uc)
+#define jit_retr_s(u)          _jit_retr(_jit,u,jit_code_retr_s)
+#define jit_reti_s(u)          _jit_reti(_jit,u,jit_code_reti_s)
+#define jit_retr_us(u)         _jit_retr(_jit,u,jit_code_retr_us)
+#define jit_reti_us(u)         _jit_reti(_jit,u,jit_code_reti_us)
+#define jit_retr_i(u)          _jit_retr(_jit,u,jit_code_retr_i)
+#define jit_reti_i(u)          _jit_reti(_jit,u,jit_code_reti_i)
+#if __WORDSIZE == 32
+#  define jit_retr(u)          jit_retr_i(u)
+#  define jit_reti(u)          jit_reti_i(u)
+#else
+#  define jit_retr_ui(u)       _jit_retr(_jit,u,jit_code_retr_ui)
+#  define jit_reti_ui(u)       _jit_reti(_jit,u,jit_code_reti_ui)
+#  define jit_retr_l(u)                _jit_retr(_jit,u,jit_code_retr_l)
+#  define jit_reti_l(u)                _jit_reti(_jit,u,jit_code_reti_l)
+#  define jit_retr(u)          jit_retr_l(u)
+#  define jit_reti(u)          jit_reti_l(u)
+#endif
+    jit_code_retr_c,           jit_code_reti_c,
+    jit_code_retr_uc,          jit_code_reti_uc,
+    jit_code_retr_s,           jit_code_reti_s,
+    jit_code_retr_us,          jit_code_reti_us,
+    jit_code_retr_i,           jit_code_reti_i,
+    jit_code_retr_ui,          jit_code_reti_ui,
+    jit_code_retr_l,           jit_code_reti_l,
+#if __WORDSIZE == 32
+#  define jit_code_retr                jit_code_retr_i
+#  define jit_code_reti                jit_code_reti_i
+#else
+#  define jit_code_retr                jit_code_retr_l
+#  define jit_code_reti                jit_code_reti_l
+#endif
+
 #define jit_retval_c(u)                _jit_retval_c(_jit,u)
 #define jit_retval_uc(u)       _jit_retval_uc(_jit,u)
-    jit_code_retval_c,         jit_code_retval_uc,
 #define jit_retval_s(u)                _jit_retval_s(_jit,u)
 #define jit_retval_us(u)       _jit_retval_us(_jit,u)
-    jit_code_retval_s,         jit_code_retval_us,
 #define jit_retval_i(u)                _jit_retval_i(_jit,u)
 #if __WORDSIZE == 32
 #  define jit_retval(u)                jit_retval_i(u)
 #else
-#  define jit_retval(u)                jit_retval_l(u)
 #  define jit_retval_ui(u)     _jit_retval_ui(_jit,u)
 #  define jit_retval_l(u)      _jit_retval_l(_jit,u)
+#  define jit_retval(u)                jit_retval_l(u)
 #endif
+    jit_code_retval_c,         jit_code_retval_uc,
+    jit_code_retval_s,         jit_code_retval_us,
     jit_code_retval_i,         jit_code_retval_ui,
     jit_code_retval_l,
+#if __WORDSIZE == 32
+#  define jit_code_retval      jit_code_retval_i
+#else
+#  define jit_code_retval      jit_code_retval_l
+#endif
 
 #define jit_epilog()           _jit_epilog(_jit)
     jit_code_epilog,
@@ -904,21 +1048,13 @@ typedef enum {
 #define jit_movr_d_w(u, v)     jit_new_node_ww(jit_code_movr_d_w, u, v)
 #define jit_movi_d_w(u, v)     jit_new_node_wd(jit_code_movi_d_w, u, v)
 
-#define jit_bswapr_us(u,v)     jit_new_node_ww(jit_code_bswapr_us,u,v)
-    jit_code_bswapr_us,
-#define jit_bswapr_ui(u,v)     jit_new_node_ww(jit_code_bswapr_ui,u,v)
-    jit_code_bswapr_ui,
-#define jit_bswapr_ul(u,v)     jit_new_node_ww(jit_code_bswapr_ul,u,v)
-    jit_code_bswapr_ul,
-#if __WORDSIZE == 32
-#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ui,u,v)
-#else
-#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ul,u,v)
-#endif
+#define jit_clor(u,v)          jit_new_node_ww(jit_code_clor,u,v)
+#define jit_clzr(u,v)          jit_new_node_ww(jit_code_clzr,u,v)
+    jit_code_clor,             jit_code_clzr,
 
-    jit_code_casr,             jit_code_casi,
-#define jit_casr(u, v, w, x)   jit_new_node_wwq(jit_code_casr, u, v, w, x)
-#define jit_casi(u, v, w, x)   jit_new_node_wwq(jit_code_casi, u, v, w, x)
+#define jit_ctor(u,v)          jit_new_node_ww(jit_code_ctor,u,v)
+#define jit_ctzr(u,v)          jit_new_node_ww(jit_code_ctzr,u,v)
+    jit_code_ctor,             jit_code_ctzr,
 
     jit_code_last_code
 } jit_code_t;
@@ -960,7 +1096,8 @@ extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t);
 extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t);
 extern void _jit_ellipsis(jit_state_t*);
 
-extern jit_node_t *_jit_arg(jit_state_t*);
+extern jit_node_t *_jit_arg(jit_state_t*, jit_code_t);
+
 extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*);
@@ -970,19 +1107,24 @@ extern void _jit_getarg_i(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*);
 #endif
-extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*);
-extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*);
+
+extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*, jit_code_t);
+extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*, jit_code_t);
 
 extern void _jit_prepare(jit_state_t*);
 extern void _jit_ellipsis(jit_state_t*);
 extern void _jit_va_push(jit_state_t*, jit_gpr_t);
-extern void _jit_pushargr(jit_state_t*, jit_gpr_t);
-extern void _jit_pushargi(jit_state_t*, jit_word_t);
+
+extern void _jit_pushargr(jit_state_t*, jit_gpr_t, jit_code_t);
+extern void _jit_pushargi(jit_state_t*, jit_word_t, jit_code_t);
+
 extern void _jit_finishr(jit_state_t*, jit_gpr_t);
 extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t);
 extern void _jit_ret(jit_state_t*);
-extern void _jit_retr(jit_state_t*, jit_gpr_t);
-extern void _jit_reti(jit_state_t*, jit_word_t);
+
+extern void _jit_retr(jit_state_t*, jit_gpr_t, jit_code_t);
+extern void _jit_reti(jit_state_t*, jit_word_t, jit_code_t);
+
 extern void _jit_retval_c(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_uc(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_s(jit_state_t*, jit_gpr_t);
@@ -992,6 +1134,7 @@ extern void _jit_retval_i(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_ui(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_l(jit_state_t*, jit_gpr_t);
 #endif
+
 extern void _jit_epilog(jit_state_t*);
 
 #define jit_patch(u)           _jit_patch(_jit,u)
@@ -1016,6 +1159,10 @@ extern void _jit_frame(jit_state_t*, jit_int32_t);
 extern void _jit_tramp(jit_state_t*, jit_int32_t);
 #define jit_emit()             _jit_emit(_jit)
 extern jit_pointer_t _jit_emit(jit_state_t*);
+#define jit_unprotect()         _jit_unprotect(_jit)
+extern void _jit_unprotect(jit_state_t*);
+#define jit_protect()           _jit_protect(_jit)
+extern void _jit_protect(jit_state_t*);
 
 #define jit_print()            _jit_print(_jit)
 extern void _jit_print(jit_state_t*);
index 6a435f1..3086499 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
 #define JIT_HASH_CONSTS                0
 #define JIT_NUM_OPERANDS       3
 
+#if __APPLE__
+#  define PACKED_STACK         1
+#endif
+
 /*
  * Types
  */
index 3593431..7986c34 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index 8f7278d..0ed9535 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -106,6 +106,9 @@ typedef enum {
 
 typedef struct {
     jit_uint32_t version       : 4;
+    /* this field originally was only used for the 'e' in armv5te.
+     * it can also be used to force hardware division, if setting
+     * version to 7, telling it is armv7r or better. */
     jit_uint32_t extend                : 1;
     /* only generate thumb instructions for thumb2 */
     jit_uint32_t thumb         : 1;
@@ -117,6 +120,12 @@ typedef struct {
      * due to some memory ordering constraint not being respected, so,
      * disable by default */
     jit_uint32_t ldrt_strt     : 1;
+    /* assume functions called never match jit instruction set?
+     * that is libc, gmp, mpfr, etc functions are in thumb mode and jit
+     * is in arm mode, or the reverse, what may cause a crash upon return
+     * of that function if generating jit for a relative jump.
+     */
+    jit_uint32_t exchange      : 1;
 } jit_cpu_t;
 
 /*
index afdf21d..df361ba 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index 7b212b9..e45818a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -117,4 +117,13 @@ typedef enum {
     _NOREG,
 } jit_reg_t;
 
+typedef struct {
+    jit_uint32_t clz           : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
 #endif /* _jit_ia64_h */
index 44982ec..89b1a86 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022  Free Software Foundation, Inc.
+ * Copyright (C) 2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index a2388c9..52aebcc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -25,6 +25,8 @@
 
 #if _MIPS_SIM != _ABIO32
 #    define NEW_ABI            1
+#else
+#    define NEW_ABI            0
 #endif
 
 /*
@@ -114,4 +116,13 @@ typedef enum {
     _NOREG,
 } jit_reg_t;
 
+typedef struct {
+    jit_uint32_t release       : 4;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
 #endif /* _jit_mips_h */
index d3d25d3..460c491 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -22,6 +22,9 @@
 
 #define JIT_HASH_CONSTS                1
 #define JIT_NUM_OPERANDS       3
+#if defined(_AIX) && !defined(_CALL_AIX) && !defined(_CALL_LINUX)
+#  define _CALL_AIXDESC                1
+#endif
 
 /*
  * Types
index d0420b8..444a295 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
 #  define HIDDEN               /**/
 #endif
 
+#if PACKED_STACK || STRONG_TYPE_CHECKING
+#  define assert_arg_type(code, expect)                                        \
+    do assert((code) == (expect)); while (0)
+#  define assert_putarg_type(code, expect)                             \
+    do                                                                 \
+       assert((((code) - jit_code_putargr_c) >> 2) ==                  \
+              ((expect) - jit_code_arg_c));                            \
+    while (0)
+#else
+#  define assert_arg_type(code, expect)                                        \
+    do assert((int)(code) == (int)(expect) ||                          \
+             (code) == jit_code_arg); while (0)
+#  define assert_putarg_type(code, expect)                             \
+    do                                                                 \
+       assert(((((code) - jit_code_putargr_c) >> 2) ==                 \
+              ((expect) - jit_code_arg_c)) ||                          \
+              ((code) == jit_code_arg));                               \
+    while (0)
+#endif
+
 #define rc(value)              jit_class_##value
 #define rn(reg)                        (jit_regno(_rvs[jit_regno(reg)].spec))
 
@@ -174,48 +194,80 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
     (!jit_regset_tstbit(&_jitc->regarg, regno) &&                      \
      !jit_regset_tstbit(&_jitc->regsav, regno))
 
-#define jit_inc_synth(code)                                            \
+#define jit_code_inc_synth(code)                                       \
     do {                                                               \
-       (void)jit_new_node(jit_code_##code);                            \
+       (void)jit_new_node(code);                                       \
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_w(code, u)                                       \
+#define jit_inc_synth(name)                                            \
+    jit_code_inc_synth(jit_code_##name)
+#define jit_code_inc_synth_w(code, u)                                  \
     do {                                                               \
-       (void)jit_new_node_w(jit_code_##code, u);                       \
+       (void)jit_new_node_w(code, u);                                  \
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_f(code, u)                                       \
+#define jit_inc_synth_w(name, u)                                       \
+    jit_code_inc_synth_w(jit_code_##name, u)
+#define jit_code_inc_synth_f(code, u)                                  \
     do {                                                               \
-       (void)jit_new_node_f(jit_code_##code, u);                       \
+       (void)jit_new_node_f(code, u);                                  \
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_d(code, u)                                       \
+#define jit_inc_synth_f(name, u)                                       \
+    jit_code_inc_synth_f(jit_code_##name, u)
+#define jit_code_inc_synth_d(code, u)                                  \
     do {                                                               \
-       (void)jit_new_node_d(jit_code_##code, u);                       \
+       (void)jit_new_node_d(code, u);                                  \
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_ww(code, u, v)                                   \
+#define jit_inc_synth_d(name, u)                                       \
+    jit_code_inc_synth_d(jit_code_##name, u)
+#define jit_code_inc_synth_ww(code, u, v)                              \
     do {                                                               \
-       (void)jit_new_node_ww(jit_code_##code, u, v);                   \
+       (void)jit_new_node_ww(code, u, v);                              \
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_wp(code, u, v)                                   \
+#define jit_inc_synth_ww(name, u, v)                                   \
+    jit_code_inc_synth_ww(jit_code_##name, u, v)
+#define jit_code_inc_synth_wp(code, u, v)                              \
     do {                                                               \
-       (void)jit_new_node_wp(jit_code_##code, u, v);                   \
+       (void)jit_new_node_wp(code, u, v);                              \
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_fp(code, u, v)                                   \
+#define jit_inc_synth_wp(name, u, v)                                   \
+    jit_code_inc_synth_wp(jit_code_##name, u, v)
+#define jit_code_inc_synth_fp(code, u, v)                              \
     do {                                                               \
-       (void)jit_new_node_fp(jit_code_##code, u, v);                   \
+       (void)jit_new_node_fp(code, u, v);                              \
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_dp(code, u, v)                                   \
+#define jit_inc_synth_fp(name, u, v)                                   \
+    jit_code_inc_synth_fp(jit_code_##name, u, v)
+#define jit_code_inc_synth_dp(code, u, v)                              \
     do {                                                               \
-       (void)jit_new_node_dp(jit_code_##code, u, v);                   \
+       (void)jit_new_node_dp(code, u, v);                              \
        jit_synth_inc();                                                \
     } while (0)
+#define jit_inc_synth_dp(name, u, v)                                   \
+    jit_code_inc_synth_dp(jit_code_##name, u, v)
 #define jit_dec_synth()                jit_synth_dec()
 
+#define jit_link_alist(node)                                           \
+    do {                                                               \
+       node->link = _jitc->function->alist;                            \
+       _jitc->function->alist = node;                                  \
+    } while (0)
+#define jit_check_frame()                                              \
+    do {                                                               \
+       if (!_jitc->function->need_frame) {                             \
+           _jitc->again = 1;                                           \
+           _jitc->function->need_frame = 1;                            \
+       }                                                               \
+    } while (0)
+#define jit_diffsize() (stack_framesize - _jitc->framesize)
+#define jit_framesize()        (stack_framesize - jit_diffsize())
+#define jit_selfsize() (_jitc->function->self.size - jit_diffsize())
+
 #define jit_link_prolog()                                              \
     do {                                                               \
        _jitc->tail->link = _jitc->function->prolog->link;              \
@@ -248,8 +300,8 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
 #define jit_class_xpr          0x80000000      /* float / vector */
 /* Used on sparc64 where %f0-%f31 can be encode for single float
  * but %f32 to %f62 only as double precision */
-#define jit_class_sng          0x10000000      /* Single precision float */
-#define jit_class_dbl          0x20000000      /* Only double precision float */
+#define jit_class_sng          0x00010000      /* Single precision float */
+#define jit_class_dbl          0x00020000      /* Only double precision float */
 #define jit_regno_patch                0x00008000      /* this is a register
                                                 * returned by a "user" call
                                                 * to jit_get_reg() */
@@ -474,9 +526,14 @@ struct jit_function {
     } call;
     jit_node_t         *prolog;
     jit_node_t         *epilog;
+    jit_node_t         *alist;
     jit_int32_t                *regoff;
     jit_regset_t        regset;
     jit_int32_t                 stack;
+#if defined(__i386__) || defined(__x86_64__)
+    jit_int32_t                 cvt_offset;    /* allocai'd offset for x87<->xmm or
+                                        * fpr<->gpr transfer using the stack */
+#endif
 
     /* Helper for common jit generation pattern, used in GNU Smalltalk
      * and possibly others, where a static frame layout is required or
@@ -485,11 +542,25 @@ struct jit_function {
     jit_uint32_t        define_frame : 1;
     jit_uint32_t        assume_frame : 1;
 
+    jit_uint32_t        need_frame : 1;        /* need frame pointer? */
+    jit_uint32_t        need_stack : 1;        /* need stack pointer? */
+    jit_uint32_t        need_return : 1;       /* not a leaf function */
+
     /* alloca offset offset */
     jit_int32_t                 aoffoff;
     /* uses allocar flag */
     jit_uint32_t        allocar : 1;
 
+#if __arm__
+    /* If will, or might use float registers and vfp is not available.
+     * Use the first 64 bytes always, as the access to the virtual float
+     * registers use hardcoded instructions that can only reach 64 byte
+     * displacements, and to keep code simpler, do not use temporaries. */
+    jit_uint32_t         swf_offset : 1;
+    /* If need to call C functions for some operation, or variadic function */
+    jit_uint32_t         save_reg_args : 1;
+#endif
+
     /* varargs state offsets */
     jit_int32_t                 vaoff;         /* offset of jit_va_list */
     jit_int32_t                 vagp;          /* first gp va argument */
@@ -509,6 +580,13 @@ struct jit_compiler {
     jit_int32_t                  rout;         /* first output register */
     jit_int32_t                  breg;         /* base register for prolog/epilog */
 #endif
+#if __mips__
+    struct {
+       jit_int32_t       op;           /* pending instruction, candidate
+                                        * to be inserted in a delay slot */
+       jit_bool_t        pend;         /* non zero if need to emit op */
+    } inst;
+#endif
 #if __mips__ || __ia64__ || __alpha__ || \
        (__sparc__ && __WORDSIZE == 64) || __riscv || __loongarch__
     jit_int32_t                  carry;
@@ -528,11 +606,14 @@ struct jit_compiler {
 #endif
     jit_uint32_t         no_data : 1;
     jit_uint32_t         no_note : 1;
+    jit_int32_t                  framesize;    /* space for callee save registers,
+                                        * frame pointer and return address */
     jit_int32_t                  reglen;       /* number of registers */
     jit_regset_t         regarg;       /* cannot allocate */
     jit_regset_t         regsav;       /* automatic spill only once */
     jit_regset_t         reglive;      /* known live registers at some point */
     jit_regset_t         regmask;      /* register mask to update reglive */
+    jit_regset_t         explive;      /* explicitly marked as live */
     struct {
        jit_uint8_t      *end;
     } code;
@@ -657,6 +738,8 @@ struct jit_state {
     struct {
        jit_uint8_t     *ptr;
        jit_word_t       length;
+        /* PROTECTED bytes starting at PTR are mprotect'd. */
+        jit_word_t       protected;
     } code;
     struct {
        jit_uint8_t     *ptr;
index ad3f76f..bf59c5b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index a28b0dd..d51cfec 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -65,4 +65,13 @@ typedef enum {
 #define JIT_NOREG              _NOREG
 } jit_reg_t;
 
+typedef struct {
+    jit_uint32_t flogr         : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
 #endif /* _jit_s390_h */
index e5988e1..ec21be9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -99,4 +99,13 @@ typedef enum {
     _NOREG,
 } jit_reg_t;
 
+typedef struct {
+    jit_uint32_t lzcnt         : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
 #endif /* _jit_sparc_h */
index 91f9124..4c48013 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -189,6 +189,10 @@ typedef struct {
     jit_uint32_t avx           : 1;
     /* lahf/sahf available in 64 bits mode */
     jit_uint32_t lahf          : 1;
+    /* lzcnt and tzcnt? */
+    jit_uint32_t abm           : 1;
+    /* adcx and adox instructions available? */
+    jit_uint32_t adx           : 1;
 } jit_cpu_t;
 
 /*
index a30e7fd..44ac4f2 100644 (file)
 AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include  \
        -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 liblightning_LTLIBRARIES = liblightning.la
-liblightning_la_LDFLAGS = -version-info 1:0:0
+liblightning_la_LDFLAGS = -version-info 2:0:0
 
+AM_CPPFLAGS =
 if get_jit_size
 JIT_SIZE_PATH = "$(top_builddir)/jit_$(cpu)-sz.c"
-AM_CPPFLAGS=-DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
+AM_CPPFLAGS += -DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
+endif
+if strong_type_checking
+AM_CPPFLAGS += -DSTRONG_TYPE_CHECKING=1
 endif
 
 liblightningdir = $(libdir)
 liblightning_la_SOURCES =      \
        jit_disasm.c            \
        jit_memory.c            \
-       jit_names.c             \
        jit_note.c              \
        jit_print.c             \
        jit_size.c              \
        lightning.c
 
 EXTRA_DIST =                   \
+       jit_names.c             \
        jit_fallback.c          \
        jit_rewind.c            \
+       aarch64-logical-immediates.c    \
        jit_aarch64.c           \
        jit_aarch64-cpu.c       \
        jit_aarch64-fpu.c       \
diff --git a/deps/lightning/lib/aarch64-logical-immediates.c b/deps/lightning/lib/aarch64-logical-immediates.c
new file mode 100644 (file)
index 0000000..c1e1ab0
--- /dev/null
@@ -0,0 +1,161 @@
+// AArch64 Logical Immediate Encoding and Decoding
+//
+// I hereby place this code in the public domain, as per the terms of the
+// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/
+
+#include <stdint.h>
+#include <stdbool.h>
+
+static inline int nonzeroCountTrailingZeros64(uint64_t n) {
+    return __builtin_ctzll(n);
+}
+
+static inline int countTrailingZeros64(uint64_t n) {
+    return n ? nonzeroCountTrailingZeros64(n) : 64;
+}
+
+static inline int nonzeroCountLeadingZeros64(uint64_t n) {
+    return __builtin_clzll(n);
+}
+
+static inline int nonzeroCountLeadingZeros32(uint32_t n) {
+    return __builtin_clz(n);
+}
+
+static inline uint64_t rotateRight64(uint64_t v, int n) {
+    // return __builtin_rotateright64(v, n);
+    return (v >> (n & 63)) | (v << (-n & 63));
+}
+
+static inline uint64_t clearTrailingOnes64(uint64_t n) {
+    return n & (n + 1);
+}
+
+#define ENCODE_FAILED (-1)
+
+int encodeLogicalImmediate64(uint64_t val) {
+    // Consider an ARM64 logical immediate as a pattern of "o" ones preceded
+    // by "z" more-significant zeroes, repeated to fill a 64-bit integer.
+    // o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This
+    // part of the pattern is encoded in the fields "imms" and "N".
+    //
+    // "immr" encodes a further right rotate of the repeated pattern, allowing
+    // a wide range of useful bitwise constants to be represented.
+    //
+    // (The spec describes the "immr" rotate as rotating the "o + z" bit
+    // pattern before repeating it to fill 64-bits, but, as it's a repeating
+    // pattern, rotating afterwards is equivalent.)
+
+    // This encoding is not allowed to represent all-zero or all-one values.
+    if (val == 0 || ~val == 0)
+        return ENCODE_FAILED;
+
+    // To detect an immediate that may be encoded in this scheme, we first
+    // remove the right-rotate, by rotating such that the least significant
+    // bit is a one and the most significant bit is a zero.
+    //
+    // We do this by clearing any trailing one bits, then counting the
+    // trailing zeroes. This finds an "edge", where zero goes to one.
+    // We then rotate the original value right by that amount, moving
+    // the first one to the least significant bit.
+
+    int rotation = countTrailingZeros64(clearTrailingOnes64(val));
+    uint64_t normalized = rotateRight64(val, rotation & 63);
+
+    // Now we have normalized the value, and determined the rotation, we can
+    // determine "z" by counting the leading zeroes, and "o" by counting the
+    // trailing ones. (These will both be positive, as we already rejected 0
+    // and ~0, and rotated the value to start with a zero and end with a one.)
+
+    int zeroes = nonzeroCountLeadingZeros64(normalized);
+    int ones = nonzeroCountTrailingZeros64(~normalized);
+    int size = zeroes + ones;
+
+    // Detect the repeating pattern (by comparing every repetition to the
+    // one next to it, using rotate).
+
+    if (rotateRight64(val, size & 63) != val)
+        return ENCODE_FAILED;
+
+    // We do not need to further validate size to ensure it is a power of two
+    // between 2 and 64. The only "minimal" patterns that can repeat to fill a
+    // 64-bit value must have a length that is a factor of 64 (i.e. it is a
+    // power of two in the range [1,64]). And our pattern cannot be of length
+    // one (as we already rejected 0 and ~0).
+    //
+    // By "minimal" patterns I refer to patterns which do not themselves
+    // contain repetitions. For example, '010101' is a non-minimal pattern of
+    // a non-power-of-two length that can pass the above rotational test. It
+    // consists of the minimal pattern '01'. All our patterns are minimal, as
+    // they contain only one contiguous run of ones separated by at least one
+    // zero.
+
+    // Finally, we encode the values. "rotation" is the amount we rotated
+    // right by to "undo" the right-rotate encoded in immr, so must be
+    // negated.
+
+    // size 2:  N=0 immr=00000r imms=11110s
+    // size 4:  N=0 immr=0000rr imms=1110ss
+    // size 8:  N=0 immr=000rrr imms=110sss
+    // size 16: N=0 immr=00rrrr imms=10ssss
+    // size 32: N=0 immr=0rrrrr imms=0sssss
+    // size 64: N=1 immr=rrrrrr imms=ssssss
+    int immr = -rotation & (size - 1);
+    int imms = -(size << 1) | (ones - 1);
+    int N = (size >> 6);
+
+    return (N << 12) | (immr << 6) | (imms & 0x3f);
+}
+
+int encodeLogicalImmediate32(uint32_t val) {
+    return encodeLogicalImmediate64(((uint64_t)val << 32) | val);
+}
+
+// Decoding!
+
+bool isValidLogicalImmediate64(unsigned val) {
+    unsigned N = (val >> 12) & 1;
+    unsigned imms = val & 0x3f;
+    unsigned pattern = (N << 6) | (~imms & 0x3f);
+    return (pattern & (pattern - 1)) != 0;
+}
+
+bool isValidLogicalImmediate32(unsigned val) {
+    unsigned N = (val >> 12) & 1;
+    return N == 0 && isValidLogicalImmediate64(val);
+}
+
+#define DECODE_FAILED 0
+
+// returns DECODE_FAILED (zero) if the encoding is invalid
+uint64_t decodeLogicalImmediate64(unsigned val) {
+    // Fun way to generate the immediates with mask ^ (mask << S)
+    static const uint64_t mask_lookup[] = {
+        0xffffffffffffffff, // size = 64
+        0x00000000ffffffff, // size = 32
+        0x0000ffff0000ffff, // size = 16
+        0x00ff00ff00ff00ff, // size = 8
+        0x0f0f0f0f0f0f0f0f, // size = 4
+        0x3333333333333333, // size = 2
+    };
+
+    unsigned N = (val >> 12) & 1;
+    int immr = (val >> 6) & 0x3f;
+    unsigned imms = val & 0x3f;
+
+    unsigned pattern = (N << 6) | (~imms & 0x3f);
+
+    if (!(pattern & (pattern - 1))) return DECODE_FAILED;
+
+    int leading_zeroes = nonzeroCountLeadingZeros32(pattern);
+    unsigned imms_mask = 0x7fffffff >> leading_zeroes;
+    uint64_t mask = mask_lookup[leading_zeroes - 25];
+    unsigned S = (imms + 1) & imms_mask;
+    return rotateRight64(mask ^ (mask << S), immr);
+}
+
+uint32_t decodeLogicalImmediate32(unsigned val) {
+    unsigned N = (val >> 12) & 1;
+    if (N) return DECODE_FAILED;
+    return (uint32_t)decodeLogicalImmediate64(val);
+}
index 35ddabf..d5e64ad 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -210,7 +210,7 @@ typedef union {
     jit_int32_t                w;
 #  undef ui
 } instr_t;
-#  define stack_framesize              160
+#  define s26_p(d)                     ((d) >= -33554432 && (d) <= 33554431)
 #  define ii(i)                                *_jit->pc.ui++ = i
 #  define ldr(r0,r1)                   ldr_l(r0,r1)
 #  define ldxr(r0,r1,r2)               ldxr_l(r0,r1,r2)
@@ -349,6 +349,9 @@ typedef union {
 #  define A64_ORR                      0x2a000000
 #  define A64_MOV                      0x2a0003e0      /* AKA orr Rd,xzr,Rm */
 #  define A64_MVN                      0x2a2003e0
+#  define A64_CLS                      0x5ac01400
+#  define A64_CLZ                      0x5ac01000
+#  define A64_RBIT                     0x5ac00000
 #  define A64_UXTW                     0x2a0003e0      /* AKA MOV */
 #  define A64_EOR                      0x4a000000
 #  define A64_ANDS                     0x6a000000
@@ -370,6 +373,9 @@ typedef union {
 #  define MOV(Rd,Rm)                   ox_x(A64_MOV|XS,Rd,Rm)
 #  define MVN(Rd,Rm)                   ox_x(A64_MVN|XS,Rd,Rm)
 #  define NEG(Rd,Rm)                   ox_x(A64_NEG|XS,Rd,Rm)
+#  define CLS(Rd,Rm)                   o_xx(A64_CLS|XS,Rd,Rm)
+#  define CLZ(Rd,Rm)                   o_xx(A64_CLZ|XS,Rd,Rm)
+#  define RBIT(Rd,Rm)                  o_xx(A64_RBIT|XS,Rd,Rm)
 #  define MOVN(Rd,Imm16)               ox_h(A64_MOVN|XS,Rd,Imm16)
 #  define MOVN_16(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16)
 #  define MOVN_32(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16)
@@ -584,6 +590,14 @@ static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define negr(r0,r1)                  NEG(r0,r1)
 #  define comr(r0,r1)                  MVN(r0,r1)
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 CLZ(r0,r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define andr(r0,r1,r2)               AND(r0,r1,r2)
 #  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
 static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -781,12 +795,12 @@ _bmxi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
 #  define bmci(i0,r0,i1)               bmxi(BCC_EQ,i0,r0,i1)
 #  define jmpr(r0)                     BR(r0)
 #  define jmpi(i0)                     _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    BLR(r0)
 #  define calli(i0)                    _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(i0)                   _prolog(_jit,i0)
@@ -802,36 +816,17 @@ static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
 #endif
 
 #if CODE
+/* https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ */
+#include "aarch64-logical-immediates.c"
 static jit_int32_t
 logical_immediate(jit_word_t imm)
 {
-    /* There are 5334 possible immediate values, but to avoid the
-     * need of either too complex code or large lookup tables,
-     * only check for (simply) encodable common/small values */
-    switch (imm) {
-       case -16:       return (0xf3b);
-       case -15:       return (0xf3c);
-       case -13:       return (0xf3d);
-       case -9:        return (0xf3e);
-       case -8:        return (0xf7c);
-       case -7:        return (0xf7d);
-       case -5:        return (0xf7e);
-       case -4:        return (0xfbd);
-       case -3:        return (0xfbe);
-       case -2:        return (0xffe);
-       case 1:         return (0x000);
-       case 2:         return (0xfc0);
-       case 3:         return (0x001);
-       case 4:         return (0xf80);
-       case 6:         return (0xfc1);
-       case 7:         return (0x002);
-       case 8:         return (0xf40);
-       case 12:        return (0xf81);
-       case 14:        return (0xfc2);
-       case 15:        return (0x003);
-       case 16:        return (0xf00);
-       default:        return (-1);
+    jit_int32_t                result = encodeLogicalImmediate64(imm);
+    if (result != ENCODE_FAILED) {
+       assert(isValidLogicalImmediate64(result));
+       return (result & 0xfff);
     }
+    return (-1);
 }
 
 static void
@@ -912,7 +907,7 @@ static void
 _o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26)
 {
     instr_t    i;
-    assert(Simm26 >= -33554432 && Simm26 <= 33554431);
+    assert(s26_p(Simm26));
     assert(!(Op   & ~0xfc000000));
     i.w = Op;
     i.imm26.b = Simm26;
@@ -1398,6 +1393,27 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        CSEL(r0, r0, r1, CC_EQ);
 }
 
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    RBIT(r0, r1);
+    clor(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    RBIT(r0, r1);
+    clzr(r0, r0);
+}
+
 static void
 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -1850,7 +1866,7 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     retry = _jit->pc.w;
     LDAXR(r0, r1);
     eqr(r0, r0, r2);
-    jump0 = beqi(_jit->pc.w r0, 0);    /* beqi done r0 0 */
+    jump0 = beqi(_jit->pc.w, r0, 0);   /* beqi done r0 0 */
     STLXR(r3, r0, r1);
     jump1 = bnei(_jit->pc.w, r0, 0);   /* bnei retry r0 0 */
     /* done: */
@@ -2166,20 +2182,22 @@ _bmxi(jit_state_t *_jit, jit_int32_t cc,
     return (w);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
     jit_int32_t                reg;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (w >= -33554432 && w <= 33554431)
-       B(w);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    if (s26_p(d))
+       B(d);
     else {
        reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
        movi(rn(reg), i0);
        jmpr(rn(reg));
        jit_unget_reg(reg);
     }
+    return (w);
 }
 
 static jit_word_t
@@ -2194,20 +2212,22 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
     jit_int32_t                reg;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (w >= -33554432 && w <= 33554431)
-       BL(w);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    if (s26_p(d))
+       BL(d);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        callr(rn(reg));
        jit_unget_reg(reg);
     }
+    return (w);
 }
 
 static jit_word_t
@@ -2222,20 +2242,13 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
-/*
- * prolog and epilog not as "optimized" as one would like, but the
- * problem of overallocating stack space to save callee save registers
- * exists on all ports, and is still a todo to use a variable
- *     stack_framesize
- * value, what would cause needing to patch some calls, most likely
- * the offset of jit_arg* of stack arguments.
- */
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, rreg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -2246,40 +2259,51 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
-    STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(stack_framesize >> 3));
-    MOV_XSP(FP_REGNO, SP_REGNO);
-#define SPILL(L, R, O)                                                 \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
-           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
-               STPI(L, R, SP_REGNO, O);                                \
-           else                                                        \
-               STRI(L, SP_REGNO, O);                                   \
-       }                                                               \
-       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
-           STRI(R, SP_REGNO, O + 1);                                   \
-    } while (0)
-    SPILL(19, 20,  2);
-    SPILL(21, 22,  4);
-    SPILL(23, 24,  6);
-    SPILL(25, 26,  8);
-    SPILL(27, 28, 10);
-#undef SPILL
-#define SPILL(R, O)                                                    \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
-               stxi_d(O, SP_REGNO, R);                                 \
-    } while (0)
-    SPILL( 8,  96);
-    SPILL( 9, 104);
-    SPILL(10, 112);
-    SPILL(11, 120);
-    SPILL(12, 128);
-    SPILL(13, 136);
-    SPILL(14, 144);
-    SPILL(15, 152);
-#undef SPILL
-    if (_jitc->function->stack)
+
+    if (!_jitc->function->need_frame) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               jit_check_frame();
+               break;
+           }
+    }
+
+    if (_jitc->function->need_frame) {
+       STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(jit_framesize() >> 3));
+       MOV_XSP(FP_REGNO, SP_REGNO);
+    }
+    /* callee save registers */
+    for (reg = 0, offs = 2; reg < jit_size(iregs);) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) {
+               if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg]))
+                   break;
+           }
+           if (rreg < jit_size(iregs)) {
+               STPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs);
+               offs += 2;
+               reg = rreg + 1;
+           }
+           else {
+               STRI(rn(iregs[reg]), SP_REGNO, offs);
+               ++offs;
+               /* No pair found */
+               break;
+           }
+       }
+       else
+           ++reg;
+    }
+    for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           stxi_d(offs, SP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+  if (_jitc->function->stack)
        subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
        reg = jit_get_reg(jit_class_gpr);
@@ -2288,6 +2312,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
        jit_unget_reg(reg);
     }
 
+#if !__APPLE__
     if (_jitc->function->self.call & jit_call_varargs) {
        /* Save gp registers in the save area, if any is a vararg */
        for (reg = 8 - _jitc->function->vagp / -8;
@@ -2305,53 +2330,55 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) +
                   reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg));
     }
+#endif
 }
 
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, rreg, offs;
     if (_jitc->function->assume_frame)
        return;
     if (_jitc->function->stack)
        MOV_XSP(SP_REGNO, FP_REGNO);
-#define LOAD(L, R, O)                                                  \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
-           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
-               LDPI(L, R, SP_REGNO, O);                                \
-           else                                                        \
-               LDRI(L, SP_REGNO, O);                                   \
-       }                                                               \
-       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
-           LDRI(R, SP_REGNO, O + 1);                                   \
-    } while (0)
-    LOAD(19, 20,  2);
-    LOAD(21, 22,  4);
-    LOAD(23, 24,  6);
-    LOAD(25, 26,  8);
-    LOAD(27, 28, 10);
-#undef LOAD
-#define LOAD(R, O)                                                     \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
-               ldxi_d(R, SP_REGNO, O);                                 \
-    } while (0)
-    LOAD( 8,  96);
-    LOAD( 9, 104);
-    LOAD(10, 112);
-    LOAD(11, 120);
-    LOAD(12, 128);
-    LOAD(13, 136);
-    LOAD(14, 144);
-    LOAD(15, 152);
-#undef LOAD
-    LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, stack_framesize >> 3);
+    /* callee save registers */
+    for (reg = 0, offs = 2; reg < jit_size(iregs);) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) {
+               if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg]))
+                   break;
+           }
+           if (rreg < jit_size(iregs)) {
+               LDPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs);
+               offs += 2;
+               reg = rreg + 1;
+           }
+           else {
+               LDRI(rn(iregs[reg]), SP_REGNO, offs);
+               ++offs;
+               /* No pair found */
+               break;
+           }
+       }
+       else
+           ++reg;
+    }
+    for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           ldxi_d(rn(fregs[reg]), SP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame)
+       LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, jit_framesize() >> 3);
     RET();
 }
 
 static void
 _vastart(jit_state_t *_jit, jit_int32_t r0)
 {
+#if !__APPLE__
     jit_int32_t                reg;
 
     assert(_jitc->function->self.call & jit_call_varargs);
@@ -2362,7 +2389,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     reg = jit_get_reg(jit_class_gpr);
 
     /* Initialize stack pointer to the first stack argument. */
-    addi(rn(reg), FP_REGNO, _jitc->function->self.size);
+    addi(rn(reg), FP_REGNO, jit_selfsize());
     stxi(offsetof(jit_va_list_t, stack), r0, rn(reg));
 
     /* Initialize gp top pointer to the first stack argument. */
@@ -2382,11 +2409,16 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
 
     jit_unget_reg(reg);
+#else
+    assert(_jitc->function->self.call & jit_call_varargs);
+    addi(r0, FP_REGNO, jit_selfsize());
+#endif
 }
 
 static void
 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#if !__APPLE__
     jit_word_t         ge_code;
     jit_word_t         lt_code;
     jit_int32_t                rg0, rg1;
@@ -2416,7 +2448,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
-    lt_code = jmpi_p(_jit->pc.w);
+    lt_code = jmpi(_jit->pc.w);
 
     /* Where to land if argument is in overflow area. */
     patch_at(ge_code, _jit->pc.w);
@@ -2435,6 +2467,11 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
+#else
+    assert(_jitc->function->self.call & jit_call_varargs);
+    ldr(r0, r1);
+    addi(r1, r1, sizeof(jit_word_t));
+#endif
 }
 
 static void
@@ -2454,7 +2491,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
     ffc = i.w & 0xffc00000;
     if (fc == A64_B || fc == A64_BL) {
        d = (label - instr) >> 2;
-       assert(d >= -33554432 && d <= 33554431);
+       assert(s26_p(d));
        i.imm26.b = d;
        u.i[0] = i.w;
     }
index 7c40539..3d17e32 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -862,6 +862,7 @@ dbopi(ltgt)
 static void
 _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#if !__APPLE__
     jit_word_t         ge_code;
     jit_word_t         lt_code;
     jit_int32_t                rg0, rg1;
@@ -891,7 +892,7 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
-    lt_code = jmpi_p(_jit->pc.w);
+    lt_code = jmpi(_jit->pc.w);
 
     /* Where to land if argument is in overflow area. */
     patch_at(ge_code, _jit->pc.w);
@@ -910,5 +911,10 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
+#else
+    assert(_jitc->function->self.call & jit_call_varargs);
+    ldr_d(r0, r1);
+    addi(r1, r1, sizeof(jit_float64_t));
+#endif
 }
 #endif
index b1f451f..3d1ea99 100644 (file)
@@ -1,20 +1,25 @@
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 120
+#  if PACKED_STACK
+#define JIT_INSTR_MAX 96
     0, /* data */
     0, /* live */
-    4, /* align */
+    12,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
-    120,       /* prolog */
+    96,        /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    44,        /* va_start */
-    64,        /* va_arg */
-    72,        /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    4, /* va_start */
+    8, /* va_arg */
+    12,        /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     20,        /* addi */
     16,        /* movi */
     8, /* movnr */
     8, /* movzr */
+    28,        /* casr */
+    36,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    8, /* bswapr_us */
+    8, /* bswapr_ui */
+    4, /* bswapr_ul */
     8, /* htonr_us */
     8, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
-    12,        /* ldi_c */
+    16,        /* ldi_c */
     4, /* ldr_uc */
-    12,        /* ldi_uc */
+    16,        /* ldi_uc */
     4, /* ldr_s */
-    12,        /* ldi_s */
+    16,        /* ldi_s */
     4, /* ldr_us */
-    12,        /* ldi_us */
+    16,        /* ldi_us */
     4, /* ldr_i */
-    12,        /* ldi_i */
+    16,        /* ldi_i */
     4, /* ldr_ui */
-    12,        /* ldi_ui */
+    16,        /* ldi_ui */
     4, /* ldr_l */
-    12,        /* ldi_l */
+    16,        /* ldi_l */
     8, /* ldxr_c */
     20,        /* ldxi_c */
     4, /* ldxr_uc */
     4, /* ldxr_l */
     20,        /* ldxi_l */
     4, /* str_c */
-    12,        /* sti_c */
+    16,        /* sti_c */
     4, /* str_s */
-    12,        /* sti_s */
+    16,        /* sti_s */
     4, /* str_i */
-    12,        /* sti_i */
+    16,        /* sti_i */
     4, /* str_l */
-    12,        /* sti_l */
+    16,        /* sti_l */
     4, /* stxr_c */
     20,        /* stxi_c */
     4, /* stxr_s */
     8, /* bxsubr_u */
     8, /* bxsubi_u */
     4, /* jmpr */
-    20,        /* jmpi */
+    4, /* jmpi */
     4, /* callr */
-    20,        /* calli */
+    16,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     4, /* movr_f */
     8, /* movi_f */
     8, /* ldr_f */
-    16,        /* ldi_f */
+    20,        /* ldi_f */
     8, /* ldxr_f */
     24,        /* ldxi_f */
     8, /* str_f */
-    16,        /* sti_f */
+    20,        /* sti_f */
     8, /* stxr_f */
     24,        /* stxi_f */
     8, /* bltr_f */
     4, /* movr_d */
     12,        /* movi_d */
     8, /* ldr_d */
-    16,        /* ldi_d */
+    20,        /* ldi_d */
     8, /* ldxr_d */
     24,        /* ldxi_d */
     8, /* str_d */
-    16,        /* sti_d */
+    20,        /* sti_d */
     8, /* stxr_d */
     24,        /* stxi_d */
     8, /* bltr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    8, /* clo */
+    4, /* clz */
+    12, /* cto */
+    8, /* ctz */
+
+#  else        /* PACKED_STACK */
+#define JIT_INSTR_MAX 120
+    0, /* data */
+    0, /* live */
+    12,        /* align */
+    0, /* save */
+    0, /* load */
+    4, /* skip */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    120,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    4, /* va_start */
+    8, /* va_arg */
+    12,        /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    20,        /* addi */
+    4, /* addcr */
+    12,        /* addci */
+    4, /* addxr */
+    8, /* addxi */
+    4, /* subr */
+    20,        /* subi */
+    4, /* subcr */
+    12,        /* subci */
+    4, /* subxr */
+    8, /* subxi */
+    24,        /* rsbi */
+    4, /* mulr */
+    20,        /* muli */
+    12,        /* qmulr */
+    20,        /* qmuli */
+    12,        /* qmulr_u */
+    20,        /* qmuli_u */
+    4, /* divr */
+    20,        /* divi */
+    4, /* divr_u */
+    12,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    12,        /* remr */
+    28,        /* remi */
+    12,        /* remr_u */
+    20,        /* remi_u */
+    4, /* andr */
+    20,        /* andi */
+    4, /* orr */
+    20,        /* ori */
+    4, /* xorr */
+    20,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    8, /* ltr */
+    8, /* lti */
+    8, /* ltr_u */
+    8, /* lti_u */
+    8, /* ler */
+    8, /* lei */
+    8, /* ler_u */
+    8, /* lei_u */
+    8, /* eqr */
+    8, /* eqi */
+    8, /* ger */
+    8, /* gei */
+    8, /* ger_u */
+    8, /* gei_u */
+    8, /* gtr */
+    8, /* gti */
+    8, /* gtr_u */
+    8, /* gti_u */
+    8, /* ner */
+    8, /* nei */
+    4, /* movr */
+    16,        /* movi */
+    8, /* movnr */
+    8, /* movzr */
+    28,        /* casr */
+    36,        /* casi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    4, /* extr_i */
+    4, /* extr_ui */
     8, /* bswapr_us */
     8, /* bswapr_ui */
     4, /* bswapr_ul */
-    28,        /* casr */
-    36,        /* casi */
+    8, /* htonr_us */
+    8, /* htonr_ui */
+    4, /* htonr_ul */
+    4, /* ldr_c */
+    16,        /* ldi_c */
+    4, /* ldr_uc */
+    16,        /* ldi_uc */
+    4, /* ldr_s */
+    16,        /* ldi_s */
+    4, /* ldr_us */
+    16,        /* ldi_us */
+    4, /* ldr_i */
+    16,        /* ldi_i */
+    4, /* ldr_ui */
+    16,        /* ldi_ui */
+    4, /* ldr_l */
+    16,        /* ldi_l */
+    8, /* ldxr_c */
+    20,        /* ldxi_c */
+    4, /* ldxr_uc */
+    20,        /* ldxi_uc */
+    4, /* ldxr_s */
+    16,        /* ldxi_s */
+    4, /* ldxr_us */
+    16,        /* ldxi_us */
+    4, /* ldxr_i */
+    20,        /* ldxi_i */
+    4, /* ldxr_ui */
+    16,        /* ldxi_ui */
+    4, /* ldxr_l */
+    20,        /* ldxi_l */
+    4, /* str_c */
+    16,        /* sti_c */
+    4, /* str_s */
+    16,        /* sti_s */
+    4, /* str_i */
+    16,        /* sti_i */
+    4, /* str_l */
+    16,        /* sti_l */
+    4, /* stxr_c */
+    20,        /* stxi_c */
+    4, /* stxr_s */
+    20,        /* stxi_s */
+    4, /* stxr_i */
+    20,        /* stxi_i */
+    4, /* stxr_l */
+    20,        /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    8, /* blti_u */
+    8, /* bler */
+    8, /* blei */
+    8, /* bler_u */
+    8, /* blei_u */
+    8, /* beqr */
+    24,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    24,        /* bnei */
+    8, /* bmsr */
+    8, /* bmsi */
+    8, /* bmcr */
+    8, /* bmci */
+    8, /* boaddr */
+    8, /* boaddi */
+    8, /* boaddr_u */
+    8, /* boaddi_u */
+    8, /* bxaddr */
+    8, /* bxaddi */
+    8, /* bxaddr_u */
+    8, /* bxaddi_u */
+    8, /* bosubr */
+    8, /* bosubi */
+    8, /* bosubr_u */
+    8, /* bosubi_u */
+    8, /* bxsubr */
+    8, /* bxsubi */
+    8, /* bxsubr_u */
+    8, /* bxsubi_u */
+    4, /* jmpr */
+    4, /* jmpi */
+    4, /* callr */
+    16,        /* calli */
+    0, /* prepare */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    96,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    12,        /* addi_f */
+    4, /* subr_f */
+    12,        /* subi_f */
+    12,        /* rsbi_f */
+    4, /* mulr_f */
+    12,        /* muli_f */
+    4, /* divr_f */
+    12,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    8, /* ltr_f */
+    16,        /* lti_f */
+    8, /* ler_f */
+    16,        /* lei_f */
+    8, /* eqr_f */
+    16,        /* eqi_f */
+    8, /* ger_f */
+    16,        /* gei_f */
+    8, /* gtr_f */
+    16,        /* gti_f */
+    8, /* ner_f */
+    16,        /* nei_f */
+    8, /* unltr_f */
+    16,        /* unlti_f */
+    8, /* unler_f */
+    16,        /* unlei_f */
+    16,        /* uneqr_f */
+    24,        /* uneqi_f */
+    8, /* unger_f */
+    16,        /* ungei_f */
+    8, /* ungtr_f */
+    16,        /* ungti_f */
+    16,        /* ltgtr_f */
+    24,        /* ltgti_f */
+    8, /* ordr_f */
+    16,        /* ordi_f */
+    8, /* unordr_f */
+    16,        /* unordi_f */
+    8, /* truncr_f_i */
+    4, /* truncr_f_l */
+    4, /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    8, /* movi_f */
+    8, /* ldr_f */
+    20,        /* ldi_f */
+    8, /* ldxr_f */
+    24,        /* ldxi_f */
+    8, /* str_f */
+    20,        /* sti_f */
+    8, /* stxr_f */
+    24,        /* stxi_f */
+    8, /* bltr_f */
+    16,        /* blti_f */
+    8, /* bler_f */
+    16,        /* blei_f */
+    8, /* beqr_f */
+    16,        /* beqi_f */
+    8, /* bger_f */
+    16,        /* bgei_f */
+    8, /* bgtr_f */
+    16,        /* bgti_f */
+    8, /* bner_f */
+    16,        /* bnei_f */
+    8, /* bunltr_f */
+    16,        /* bunlti_f */
+    8, /* bunler_f */
+    16,        /* bunlei_f */
+    16,        /* buneqr_f */
+    24,        /* buneqi_f */
+    8, /* bunger_f */
+    16,        /* bungei_f */
+    8, /* bungtr_f */
+    16,        /* bungti_f */
+    16,        /* bltgtr_f */
+    24,        /* bltgti_f */
+    8, /* bordr_f */
+    16,        /* bordi_f */
+    8, /* bunordr_f */
+    16,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    12,        /* addi_d */
+    4, /* subr_d */
+    12,        /* subi_d */
+    12,        /* rsbi_d */
+    4, /* mulr_d */
+    12,        /* muli_d */
+    4, /* divr_d */
+    12,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    8, /* ltr_d */
+    16,        /* lti_d */
+    8, /* ler_d */
+    16,        /* lei_d */
+    8, /* eqr_d */
+    16,        /* eqi_d */
+    8, /* ger_d */
+    16,        /* gei_d */
+    8, /* gtr_d */
+    16,        /* gti_d */
+    8, /* ner_d */
+    16,        /* nei_d */
+    8, /* unltr_d */
+    16,        /* unlti_d */
+    8, /* unler_d */
+    16,        /* unlei_d */
+    16,        /* uneqr_d */
+    24,        /* uneqi_d */
+    8, /* unger_d */
+    16,        /* ungei_d */
+    8, /* ungtr_d */
+    16,        /* ungti_d */
+    16,        /* ltgtr_d */
+    24,        /* ltgti_d */
+    8, /* ordr_d */
+    16,        /* ordi_d */
+    8, /* unordr_d */
+    16,        /* unordi_d */
+    8, /* truncr_d_i */
+    4, /* truncr_d_l */
+    4, /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    12,        /* movi_d */
+    8, /* ldr_d */
+    20,        /* ldi_d */
+    8, /* ldxr_d */
+    24,        /* ldxi_d */
+    8, /* str_d */
+    20,        /* sti_d */
+    8, /* stxr_d */
+    24,        /* stxi_d */
+    8, /* bltr_d */
+    16,        /* blti_d */
+    8, /* bler_d */
+    16,        /* blei_d */
+    8, /* beqr_d */
+    20,        /* beqi_d */
+    8, /* bger_d */
+    16,        /* bgei_d */
+    8, /* bgtr_d */
+    16,        /* bgti_d */
+    8, /* bner_d */
+    16,        /* bnei_d */
+    8, /* bunltr_d */
+    16,        /* bunlti_d */
+    8, /* bunler_d */
+    16,        /* bunlei_d */
+    16,        /* buneqr_d */
+    24,        /* buneqi_d */
+    8, /* bunger_d */
+    16,        /* bungei_d */
+    8, /* bungtr_d */
+    16,        /* bungti_d */
+    16,        /* bltgtr_d */
+    24,        /* bltgti_d */
+    8, /* bordr_d */
+    16,        /* bordi_d */
+    8, /* bunordr_d */
+    16,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+    8, /* clo */
+    4, /* clz */
+    12, /* cto */
+    8, /* ctz */
+#  endif
 #endif /* __WORDSIZE */
index b54d007..243e677 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *     Paulo Cesar Pereira de Andrade
  */
 
+/* callee save
+ * align16(lr+fp+x19+x2[0-8]+v8+v9+v1[0-15]) */
+#define stack_framesize                        160
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
 
+#if __APPLE__
+typedef jit_pointer_t jit_va_list_t;
+#else
 typedef struct jit_qreg {
     jit_float64_t      l;
     jit_float64_t      h;
@@ -52,10 +59,13 @@ typedef struct jit_va_list {
     jit_qreg_t         q6;
     jit_qreg_t         q7;
 } jit_va_list_t;
+#endif
 
 /*
  * Prototypes
  */
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
@@ -72,7 +82,11 @@ extern void __clear_cache(void *, void *);
  */
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0x08,                  "x8" },
+#if __APPLE__
+    { 0x12,                            "x18" },
+#else
     { rc(gpr) | 0x12,                  "x18" },
+#endif
     { rc(gpr) | 0x11,                  "x17" },
     { rc(gpr) | 0x10,                  "x16" },
     { rc(gpr) | 0x09,                  "x9" },
@@ -138,6 +152,14 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _R19, _R20, _R21, _R22, _R23, _R24, _R25, _R26, _R27, _R28
+};
+
+static jit_int32_t fregs[] = {
+    _V8, _V9, _V10, _V11, _V12, _V13, _V14, _V15
+};
+
 /*
  * Implementation
  */
@@ -198,6 +220,7 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -258,20 +281,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -331,7 +352,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -341,6 +362,7 @@ void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare) {
        jit_link_prepare();
        assert(!(_jitc->function->call.call & jit_call_varargs));
@@ -351,6 +373,7 @@ _jit_ellipsis(jit_state_t *_jit)
        assert(!(_jitc->function->self.call & jit_call_varargs));
        _jitc->function->self.call |= jit_call_varargs;
 
+#if !__APPLE_
        /* Allocate va_list like object in the stack,
         * with enough space to save all argument
         * registers, and use fixed offsets for them. */
@@ -367,6 +390,7 @@ _jit_ellipsis(jit_state_t *_jit)
            _jitc->function->vafp = (8 - _jitc->function->self.argf) * -16;
        else
            _jitc->function->vafp = 0;
+#endif
     }
     jit_dec_synth();
 }
@@ -380,7 +404,7 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
@@ -389,10 +413,22 @@ _jit_arg(jit_state_t *_jit)
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
+#if PACKED_STACK || STRONG_TYPE_CHECKING
+       assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
+#if PACKED_STACK
+       _jitc->function->self.size +=
+           _jitc->function->self.size & ((1 << (code - jit_code_arg_c)) - 1);
+#endif
        offset = _jitc->function->self.size;
+#if PACKED_STACK
+       _jitc->function->self.size += 1 << (code - jit_code_arg_c);
+#else
        _jitc->function->self.size += sizeof(jit_word_t);
+#endif
+       jit_check_frame();
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -408,8 +444,17 @@ _jit_arg_f(jit_state_t *_jit)
     if (jit_arg_f_reg_p(_jitc->function->self.argf))
        offset = _jitc->function->self.argf++;
     else {
+#if PACKED_STACK
+       _jitc->function->self.size +=
+           _jitc->function->self.size & (sizeof(jit_float32_t) - 1);
+#endif
        offset = _jitc->function->self.size;
+#if PACKED_STACK
+       _jitc->function->self.size += sizeof(jit_float32_t);
+#else
        _jitc->function->self.size += sizeof(jit_word_t);
+#endif
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
@@ -427,8 +472,13 @@ _jit_arg_d(jit_state_t *_jit)
     if (jit_arg_f_reg_p(_jitc->function->self.argf))
        offset = _jitc->function->self.argf++;
     else {
+#if PACKED_STACK
+       _jitc->function->self.size +=
+           _jitc->function->self.size & (sizeof(jit_float64_t) - 1);
+#endif
        offset = _jitc->function->self.size;
-       _jitc->function->self.size += sizeof(jit_word_t);
+       _jitc->function->self.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
@@ -439,111 +489,235 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_c(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_c(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_c(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_uc(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_uc(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_s(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_s(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_s(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_us(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_us(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_us(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK || __WORDSIZE == 32
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_i(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_i(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
+#if __WORDSIZE == 64
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_ui(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_ui(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_l(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
+#endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr(JIT_RA0 - v->u.w, u);
-    else
-       jit_stxi(v->u.w, JIT_FP, u);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
+    if (jit_arg_reg_p(v->u.w)) {
+       jit_int32_t     regno = JIT_RA0 - v->u.w;
+#if PACKED_STACK
+       switch (code) {
+           case jit_code_putargr_c:    jit_extr_c(regno, u);   break;
+           case jit_code_putargr_uc:   jit_extr_uc(regno, u);  break;
+           case jit_code_putargr_s:    jit_extr_s(regno, u);   break;
+           case jit_code_putargr_us:   jit_extr_us(regno, u);  break;
+#  if __WORDISZE == 32
+           case jit_code_putargr_i:    jit_movr(regno, u);     break;
+#  else
+           case jit_code_putargr_i:    jit_extr_i(regno, u);   break;
+           case jit_code_putargr_ui:   jit_extr_ui(regno, u);  break;
+           case jit_code_putargr_l:    jit_movr(regno, u);     break;
+#  endif
+           default:                    abort();                break;
+       }
+#else
+       jit_movr(regno, u);
+#endif
+    }
+    else {
+       jit_node_t      *node;
+#if PACKED_STACK
+       switch (code) {
+           case jit_code_putargr_c:    case jit_code_putargr_uc:
+               node = jit_stxi_c(v->u.w, JIT_FP, u);           break;
+           case jit_code_putargr_s:    case jit_code_putargr_us:
+               node = jit_stxi_s(v->u.w, JIT_FP, u);           break;
+#  if __WORDSIZE == 32
+           case jit_code_putargr_i:
+               node = jit_stxi(v->u.w, JIT_FP, u);             break;
+#  else
+           case jit_code_putargr_i:    case jit_code_putargr_ui:
+               node = jit_stxi_i(v->u.w, JIT_FP, u);           break;
+           case jit_code_putargr_l:
+               node = jit_stxi(v->u.w, JIT_FP, u);             break;
+#  endif
+           default:                    abort();                break;
+       }
+#else
+       node = jit_stxi(v->u.w, JIT_FP, u);
+#endif
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
+#if PACKED_STACK
+    switch (code) {
+       case jit_code_putargi_c:        u = (jit_int8_t)u;      break;
+       case jit_code_putargi_uc:       u = (jit_uint8_t)u;     break;
+       case jit_code_putargi_s:        u = (jit_int16_t)u;     break;
+       case jit_code_putargi_us:       u = (jit_uint16_t)u;    break;
+#  if __WORDSIZE == 32
+       case jit_code_putargi_i:                                break;
+#  else
+       case jit_code_putargi_i:        u = (jit_int32_t)u;     break;
+       case jit_code_putargi_ui:       u = (jit_uint32_t)u;    break;
+       case jit_code_putargi_l:                                break;
+#  endif
+       default:                        abort();                break;
+    }
+#endif
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, JIT_FP, regno);
+#if PACKED_STACK
+       switch (code) {
+           case jit_code_putargi_c:    case jit_code_putargi_uc:
+               node = jit_stxi_c(v->u.w, JIT_FP, regno);       break;
+           case jit_code_putargi_s:    case jit_code_putargi_us:
+               node = jit_stxi_s(v->u.w, JIT_FP, regno);       break;
+#  if __WORDSIZE == 32
+           case jit_code_putargi_i:
+               node = jit_stxi(v->u.w, JIT_FP, regno);         break;
+#  else
+           case jit_code_putargi_i:    case jit_code_putargi_ui:
+               node = jit_stxi_i(v->u.w, JIT_FP, regno);       break;
+           case jit_code_putargi_l:
+               node = jit_stxi(v->u.w, JIT_FP, regno);         break;
+#  endif
+           default:                    abort();                break;
+       }
+#else
+       node = jit_stxi(v->u.w, JIT_FP, regno);
+#endif
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -556,8 +730,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     jit_inc_synth_wp(getarg_f, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr_f(u, JIT_FA0 - v->u.w);
-    else
-       jit_ldxi_f(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_f(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -568,8 +744,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     jit_inc_synth_wp(putargr_f, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movr_f(JIT_FA0 - v->u.w, u);
-    else
-       jit_stxi_f(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_f(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -582,9 +760,11 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(JIT_FA0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, JIT_FP, regno);
+       node = jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -597,8 +777,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     jit_inc_synth_wp(getarg_d, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movr_d(u, JIT_FA0 - v->u.w);
-    else
-       jit_ldxi_d(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_d(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -609,8 +791,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     jit_inc_synth_wp(putargr_d, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr_d(JIT_FA0 - v->u.w, u);
-    else
-       jit_stxi_d(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_d(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -623,48 +807,161 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movi_d(JIT_FA0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, JIT_FP, regno);
+       node = jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
+       jit_int32_t     regno = JIT_RA0 - _jitc->function->call.argi;
+#if PACKED_STACK
+       switch (code) {
+           case jit_code_pushargr_c:   jit_extr_c(regno, u);   break;
+           case jit_code_pushargr_uc:  jit_extr_uc(regno, u);  break;
+           case jit_code_pushargr_s:   jit_extr_s(regno, u);   break;
+           case jit_code_pushargr_us:  jit_extr_us(regno, u);  break;
+#  if __WORDISZE == 32
+           case jit_code_pushargr_i:   jit_movr(regno, u);     break;
+#  else
+           case jit_code_pushargr_i:   jit_extr_i(regno, u);   break;
+           case jit_code_pushargr_ui:  jit_extr_ui(regno, u);  break;
+           case jit_code_pushargr_l:   jit_movr(regno, u);     break;
+#  endif
+           default:                    abort();                break;
+       }
+#else
+       jit_movr(regno, u);
+#endif
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(code == jit_code_pushargr);
+           jit_stxi(_jitc->function->call.size, JIT_SP, u);
+           _jitc->function->call.size += sizeof(jit_word_t);
+       }
+#endif
        ++_jitc->function->call.argi;
     }
     else {
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size &
+               ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1);
+       switch (code) {
+           case jit_code_pushargr_c:   case jit_code_pushargr_uc:
+               jit_stxi_c(_jitc->function->call.size, JIT_SP, u);
+               break;
+           case jit_code_pushargr_s:   case jit_code_pushargr_us:
+               jit_stxi_s(_jitc->function->call.size, JIT_SP, u);
+               break;
+#  if __WORDSIZE == 32
+           case jit_code_pushargr_i:
+               jit_stxi(_jitc->function->call.size, JIT_SP, u);
+               break;
+#  else
+           case jit_code_pushargr_i:   case jit_code_pushargr_ui:
+               jit_stxi_i(_jitc->function->call.size, JIT_SP, u);
+               break;
+           case jit_code_pushargr_l:
+               jit_stxi(_jitc->function->call.size, JIT_SP, u);
+               break;
+#  endif
+           default:
+               abort();
+               break;
+       }
+       _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2);
+#else
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+#endif
+       jit_check_frame();
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
+#if PACKED_STACK
+    switch (code) {
+       case jit_code_pushargi_c:       u = (jit_int8_t)u;      break;
+       case jit_code_pushargi_uc:      u = (jit_uint8_t)u;     break;
+       case jit_code_pushargi_s:       u = (jit_int16_t)u;     break;
+       case jit_code_pushargi_us:      u = (jit_uint16_t)u;    break;
+#  if __WORDSIZE == 32
+       case jit_code_pushargi_i:                               break;
+#  else
+       case jit_code_pushargi_i:       u = (jit_int32_t)u;     break;
+       case jit_code_pushargi_ui:      u = (jit_uint32_t)u;    break;
+       case jit_code_pushargi_l:                               break;
+#  endif
+       default:                        abort();                break;
+    }
+#endif
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
+       regno = JIT_RA0 - _jitc->function->call.argi;
+       jit_movi(regno, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(code == jit_code_pushargi);
+           jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+           _jitc->function->call.size += sizeof(jit_word_t);
+       }
+#endif
        ++_jitc->function->call.argi;
     }
     else {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size &
+               ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1);
+       switch (code) {
+           case jit_code_pushargi_c:   case jit_code_pushargi_uc:
+               jit_stxi_c(_jitc->function->call.size, JIT_SP, regno);
+               break;
+           case jit_code_pushargi_s:   case jit_code_pushargi_us:
+               jit_stxi_s(_jitc->function->call.size, JIT_SP, regno);
+               break;
+#  if __WORDSIZE == 32
+           case jit_code_pushargi_i:
+               jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+               break;
+#  else
+           case jit_code_pushargi_i:   case jit_code_pushargi_ui:
+               jit_stxi_i(_jitc->function->call.size, JIT_SP, regno);
+               break;
+           case jit_code_pushargi_l:
+               jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+               break;
+#  endif
+           default:
+               abort();
+               break;
+       }
+       _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2);
+#else
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
-       jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+#endif
+       jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -677,11 +974,27 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(sizeof(jit_float32_t) == sizeof(jit_word_t));
+           jit_stxi_f(_jitc->function->call.size, JIT_SP,
+                      JIT_FA0 - _jitc->function->call.argf);
+           _jitc->function->call.size += sizeof(jit_word_t);
+       }
+#endif
        ++_jitc->function->call.argf;
     }
     else {
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size & (sizeof(jit_float32_t) - 1);
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_float32_t);
+#else
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+#endif
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -695,14 +1008,30 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(sizeof(jit_float32_t) == sizeof(jit_word_t));
+           jit_stxi_f(_jitc->function->call.size, JIT_SP,
+                      JIT_FA0 - _jitc->function->call.argf);
+           _jitc->function->call.size += sizeof(jit_word_t);
+       }
+#endif
        ++_jitc->function->call.argf;
     }
     else {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size & (sizeof(jit_float32_t) - 1);
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+       _jitc->function->call.size += sizeof(jit_float32_t);
+#else
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
-       jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+#endif
+       jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -715,11 +1044,24 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(sizeof(jit_float64_t) == sizeof(jit_word_t));
+           jit_stxi_d(_jitc->function->call.size, JIT_SP,
+                      JIT_FA0 - _jitc->function->call.argf);
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       }
+#endif
        ++_jitc->function->call.argf;
     }
     else {
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size & (sizeof(jit_float64_t) - 1);
+#endif
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -733,14 +1075,27 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(sizeof(jit_float64_t) == sizeof(jit_word_t));
+           jit_stxi_d(_jitc->function->call.size, JIT_SP,
+                      JIT_FA0 - _jitc->function->call.argf);
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       }
+#endif
        ++_jitc->function->call.argf;
     }
     else {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size & (sizeof(jit_float64_t) - 1);
+#endif
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -769,7 +1124,12 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishr, r0);
+#if PACKED_STACK
+    _jitc->function->call.size +=
+       _jitc->function->call.size & (sizeof(jit_word_t) - 1);
+#endif
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     node = jit_callr(r0);
@@ -786,7 +1146,12 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
+#if PACKED_STACK
+    _jitc->function->call.size +=
+       _jitc->function->call.size & (sizeof(jit_word_t) - 1);
+#endif
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     node = jit_calli(i0);
@@ -835,10 +1200,15 @@ void
 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_inc_synth_w(retval_i, r0);
+#if __WORDSIZE == 32
+    jit_movr(r0, JIT_RET);
+#else
     jit_extr_i(r0, JIT_RET);
+#endif
     jit_dec_synth();
 }
 
+#if __WORDSIZE == 64
 void
 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
 {
@@ -851,10 +1221,10 @@ void
 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_inc_synth_w(retval_l, r0);
-    if (r0 != JIT_RET)
-       jit_movr(r0, JIT_RET);
+    jit_movr(r0, JIT_RET);
     jit_dec_synth();
 }
+#endif
 
 void
 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
@@ -886,6 +1256,7 @@ _emit_code(jit_state_t *_jit)
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1023,6 +1394,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1070,6 +1444,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1391,6 +1769,7 @@ _emit_code(jit_state_t *_jit)
                case_brr(bunord, _d);
                case_brd(bunord);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -1401,17 +1780,26 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s26_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
+               }
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
+               jit_check_frame();
                if (node->flag & jit_flag_node) {
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
@@ -1419,7 +1807,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
-                       word = calli_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s26_p(word))
+                           word = calli(_jit->pc.w);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
@@ -1430,11 +1823,14 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
+               compute_framesize();
+               patch_alist(0);
                _jitc->again = 0;
                prolog(node);
                break;
@@ -1450,10 +1846,22 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                /* remember label is defined */
@@ -1474,11 +1882,23 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1486,16 +1906,34 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_retval_s:             case jit_code_retval_us:
            case jit_code_retval_i:
+#if __WORDSIZE == 64
            case jit_code_retval_ui:            case jit_code_retval_l:
+#endif
            case jit_code_retval_f:             case jit_code_retval_d:
            case jit_code_prepare:
            case jit_code_finishr:              case jit_code_finishi:
@@ -1571,6 +2009,23 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     stxi_d(i0, rn(r0), rn(r1));
 }
 
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = 16;     /* ra+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += sizeof(jit_word_t);
+
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
index 40f3126..91d15c8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -320,6 +320,12 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 #define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define negr(r0,r1)                  NEGQ(r1,r0)
 #  define comr(r0,r1)                  NOT(r1,r0)
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 CTLZ(r1, r0)
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 CTTZ(r1, r0)
 #  define addr(r0,r1,r2)               ADDQ(r1,r2,r0)
 #  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -637,7 +643,7 @@ static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define jmpr(r0)                     JMP(_R31_REGNO,r0,0)
 #  define jmpi(i0)                     _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*, jit_word_t);
+static jit_word_t _jmpi(jit_state_t*, jit_word_t);
 #  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
 #define callr(r0)                      _callr(_jit,r0)
@@ -825,7 +831,7 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     }
     again = _jit->pc.w;                        /* AGAIN */
     LDQ_L(r0, r1, 0);                  /* Load r0 locked */
-    jump0 = bner(0, r0, r2);           /* bne FAIL r0 r2 */
+    jump0 = bner(_jit->pc.w, r0, r2);  /* bne FAIL r0 r2 */
     movr(r0, r3);                      /* Move to r0 to attempt to store */
     STQ_C(r0, r1, 0);                  /* r0 is an in/out argument */
     jump1 = _jit->pc.w;
@@ -840,6 +846,20 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
         jit_unget_reg(r1_reg);
 }
 
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    ctzr(r0, r0);
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -2543,7 +2563,7 @@ _bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(t0);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
     jit_word_t         w;
@@ -2553,7 +2573,8 @@ _jmpi(jit_state_t *_jit, jit_word_t i0)
     if (_s21_p(d))
        BR(_R31_REGNO, d);
     else
-       (void)jmpi_p(i0);
+       w = jmpi_p(i0);
+    return (w);
 }
 
 static jit_word_t
index 5452a1e..83736b7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index ac314f2..14949b2 100644 (file)
@@ -1,10 +1,11 @@
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 168
+#define JIT_INSTR_MAX 88
     0, /* data */
     0, /* live */
-    4, /* align */
+    12,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     20,        /* va_start */
     24,        /* va_arg */
     44,        /* va_arg_d */
     32,        /* movi */
     4, /* movnr */
     4, /* movzr */
+    32,        /* casr */
+    60,        /* casi */
     8, /* extr_c */
     8, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     8, /* extr_i */
     8, /* extr_ui */
+    16,        /* bswapr_us */
+    36,        /* bswapr_ui */
+    36,        /* bswapr_ul */
     16,        /* htonr_us */
     36,        /* htonr_ui */
     36,        /* htonr_ul */
     16,        /* bxsubr_u */
     16,        /* bxsubi_u */
     4, /* jmpr */
-    36,        /* jmpi */
+    4, /* jmpi */
     8, /* callr */
     36,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    16,        /* bswapr_us */
-    36,        /* bswapr_ui */
-    36,        /* bswapr_ul */
-    32,        /* casr */
-    60,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    8, /* cto */
+    4, /* ctz */
 #endif /* __WORDSIZE */
index 678d5c6..25566f4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -246,20 +246,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -319,7 +317,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -361,18 +359,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function != NULL);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += 8;
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -417,7 +419,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _A0 - v->u.w);
@@ -429,7 +431,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _A0 - v->u.w);
@@ -441,7 +443,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _A0 - v->u.w);
@@ -453,7 +455,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _A0 - v->u.w);
@@ -465,7 +467,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, _A0 - v->u.w);
@@ -477,7 +479,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _A0 - v->u.w);
@@ -489,7 +491,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _A0 - v->u.w);
@@ -499,10 +501,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_A0 - v->u.w, u);
     else
@@ -511,11 +513,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_A0 - v->u.w, u);
     else {
@@ -610,10 +612,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function != NULL);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_A0 - _jitc->function->call.argi, u);
@@ -627,11 +629,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_int64_t u)
+_jit_pushargi(jit_state_t *_jit, jit_int64_t u, jit_code_t code)
 {
     jit_int32_t                regno;
     assert(_jitc->function != NULL);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_A0 - _jitc->function->call.argi, u);
@@ -863,6 +865,7 @@ _emit_code(jit_state_t *_jit)
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -983,6 +986,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1124,6 +1130,10 @@ _emit_code(jit_state_t *_jit)
                break;
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
@@ -1361,7 +1371,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (_s21_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
@@ -1390,6 +1405,7 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1410,6 +1426,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1434,11 +1460,18 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:                case jit_code_arg_l:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1446,10 +1479,22 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
index 12f9a2f..a0852a2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -37,8 +37,7 @@
 #  define jit_armv5e_p()               (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
 #  define jit_armv6_p()                        (jit_cpu.version >= 6)
 #  define jit_armv7_p()                        (jit_cpu.version >= 7)
-#  define jit_armv7r_p()               0
-#  define stack_framesize              48
+#  define jit_armv7r_p()               (jit_cpu.version > 7 || (jit_cpu.version == 7 && jit_cpu.extend))
 extern int     __aeabi_idivmod(int, int);
 extern unsigned        __aeabi_uidivmod(unsigned, unsigned);
 #  define _R0_REGNO                    0x00
@@ -141,8 +140,12 @@ extern unsigned    __aeabi_uidivmod(unsigned, unsigned);
 #  define THUMB2_UMULL                 0xfba00000
 #  define ARM_SMULL                    0x00c00090
 #  define THUMB2_SMULL                 0xfb800000
+/* >> ARMv7r */
+#  define ARM_SDIV                     0x07100010
+#  define ARM_UDIV                     0x07300010
 #  define THUMB2_SDIV                  0xfb90f0f0
 #  define THUMB2_UDIV                  0xfbb0f0f0
+/* << ARMv7r */
 #  define ARM_AND                      0x00000000
 #  define THUMB_AND                        0x4000
 #  define THUMB2_AND                   0xea000000
@@ -185,6 +188,12 @@ extern unsigned    __aeabi_uidivmod(unsigned, unsigned);
 #  define ARM_STREX                    0x01800090
 #  define THUMB2_STREX                 0xe8400000
 /* << ARMv6* */
+/* >> ARMv6t2 */
+#  define THUMB2_CLZ                   0xfab0f080
+#  define THUMB2_RBIT                  0xfa90f0a0
+#  define ARM_RBIT                     0x06f00030
+/* << ARMv6t2 */
+#  define ARM_CLZ                      0x01600010
 /* >> ARMv7 */
 #  define ARM_DMB                      0xf57ff050
 #  define THUMB2_DMB                   0xf3bf8f50
@@ -447,6 +456,12 @@ static void _tdmb(jit_state_t *_jit, int im);
 #  define NOT(rd,rm)                   CC_NOT(ARM_CC_AL,rd,rm)
 #  define T1_NOT(rd,rm)                        T1_MVN(rd,rm)
 #  define T2_NOT(rd,rm)                        T2_MVN(rd,rm)
+#  define T2_CLZ(rd,rm)                        torrr(THUMB2_CLZ,rm,rd,rm)
+#  define CC_CLZ(cc,rd,rm)             corrrr(cc,ARM_CLZ,_R15_REGNO,rd,_R15_REGNO,rm)
+#  define CLZ(rd,rm)                   CC_CLZ(ARM_CC_AL,rd,rm)
+#  define T2_RBIT(rd,rm)               torrr(THUMB2_RBIT,rm,rd,rm)
+#  define CC_RBIT(cc,rd,rm)            corrrr(cc,ARM_RBIT,_R15_REGNO,rd,_R15_REGNO,rm)
+#  define RBIT(rd,rm)                  CC_RBIT(ARM_CC_AL,rd,rm)
 #  define NOP()                                MOV(_R0_REGNO, _R0_REGNO)
 #  define T1_NOP()                     is(0xbf00)
 #  define CC_ADD(cc,rd,rn,rm)          corrr(cc,ARM_ADD,rn,rd,rm)
@@ -524,6 +539,10 @@ static void _tdmb(jit_state_t *_jit, int im);
 #  define CC_UMULL(cc,rl,rh,rn,rm)     corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
 #  define UMULL(rl,rh,rn,rm)           CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
 #  define T2_UMULL(rl,rh,rn,rm)                torrrr(THUMB2_UMULL,rn,rl,rh,rm)
+#  define CC_SDIV(cc,rd,rn,rm)         corrrr(cc,ARM_SDIV,rd,15,rn,rm)
+#  define SDIV(rd,rn,rm)               CC_SDIV(ARM_CC_AL,rd,rm,rn)
+#  define CC_UDIV(cc,rd,rn,rm)         corrrr(cc,ARM_UDIV,rd,15,rn,rm)
+#  define UDIV(rd,rn,rm)               CC_UDIV(ARM_CC_AL,rd,rm,rn)
 #  define T2_SDIV(rd,rn,rm)            torrr(THUMB2_SDIV,rn,rd,rm)
 #  define T2_UDIV(rd,rn,rm)            torrr(THUMB2_UDIV,rn,rd,rm)
 #  define CC_AND(cc,rd,rn,rm)          corrr(cc,ARM_AND,rn,rd,rm)
@@ -852,6 +871,8 @@ static void _tdmb(jit_state_t *_jit, int im);
 #  define T2_POP(im)                   tpp(THUMB2_POP,im)
 #  define jit_get_reg_args()                                           \
     do {                                                               \
+       CHECK_REG_ARGS();                                               \
+       jit_check_frame();                                              \
        (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr);           \
        (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr);           \
        (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr);           \
@@ -885,6 +906,14 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define negr(r0,r1)                  _negr(_jit,r0,r1)
 static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define addr(r0,r1,r2)               _addr(_jit,r0,r1,r2)
 static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
@@ -1147,10 +1176,10 @@ static void _prolog(jit_state_t*,jit_node_t*);
 static void _epilog(jit_state_t*,jit_node_t*);
 #  define callr(r0)                    _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
-#  define calli(i0)                    _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
-#  define calli_p(i0)                  _calli_p(_jit,i0)
-static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define calli(i0,i1)                 _calli(_jit,i0,i1)
+static void _calli(jit_state_t*,jit_word_t,jit_bool_t);
+#  define calli_p(i0,i1)               _calli_p(_jit,i0,i1)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_bool_t);
 #  define vastart(r0)                  _vastart(_jit, r0)
 static void _vastart(jit_state_t*, jit_int32_t);
 #  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
@@ -1526,7 +1555,7 @@ _tpp(jit_state_t *_jit, int o, int im)
     assert(!(o & 0x0000ffff));
     if (o == THUMB2_PUSH)
        assert(!(im & 0x8000));
-    assert(__builtin_popcount(im & 0x1fff) > 1);
+    assert(__builtin_popcount(im & 0x7fff) > 1);
     thumb.i = o|im;
     iss(thumb.s[0], thumb.s[1]);
 }
@@ -1737,6 +1766,53 @@ _negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
        RSBI(r0, r1, 0);
 }
 
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (!jit_thumb_p() && jit_armv5e_p())
+       CLZ(r0, r1);
+    else if (jit_thumb_p() && jit_armv7_p()) { /* armv6t2 actually */
+       T2_CLZ(r0, r1);
+    }
+    else
+       fallback_clz(r0, r0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_armv7_p()) {       /* armv6t2 actually */
+       if (jit_thumb_p())
+           T2_RBIT(r0, r1);
+       else
+           RBIT(r0, r1);
+       clor(r0, r0);
+    }
+    else
+       fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_armv7_p()) {       /* armv6t2 actually */
+       if (jit_thumb_p())
+           T2_RBIT(r0, r1);
+       else
+           RBIT(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_ctz(r0, r1);
+}
+
 static void
 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2232,8 +2308,12 @@ _divrem(jit_state_t *_jit, int div, int sign,
 static void
 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    if (jit_armv7r_p() && jit_thumb_p())
-       T2_SDIV(r0, r1, r2);
+    if (jit_armv7r_p()) {
+       if (jit_thumb_p())
+           T2_SDIV(r0, r1, r2);
+       else
+           SDIV(r0, r1, r2);
+    }
     else
        divrem(1, 1, r0, r1, r2);
 }
@@ -2251,8 +2331,12 @@ _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    if (jit_armv7r_p() && jit_thumb_p())
-       T2_UDIV(r0, r1, r2);
+    if (jit_armv7r_p()) {
+       if (jit_thumb_p())
+           T2_UDIV(r0, r1, r2);
+       else
+           UDIV(r0, r1, r2);
+    }
     else
        divrem(1, 0, r0, r1, r2);
 }
@@ -2312,7 +2396,23 @@ _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
 static void
 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    divrem(0, 1, r0, r1, r2);
+    if (jit_armv7r_p()) {
+       jit_int32_t             reg;
+       if (r0 == r1 || r0 == r2) {
+           reg = jit_get_reg(jit_class_gpr);
+           divr(rn(reg), r1, r2);
+           mulr(rn(reg), r2, rn(reg));
+           subr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else {
+           divr(r0, r1, r2);
+           mulr(r0, r2, r0);
+           subr(r0, r1, r0);
+       }
+    }
+    else
+       divrem(0, 1, r0, r1, r2);
 }
 
 static void
@@ -2328,7 +2428,23 @@ _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    divrem(0, 0, r0, r1, r2);
+    if (jit_armv7r_p()) {
+       jit_int32_t             reg;
+       if (r0 == r1 || r0 == r2) {
+           reg = jit_get_reg(jit_class_gpr);
+           divr_u(rn(reg), r1, r2);
+           mulr(rn(reg), r2, rn(reg));
+           subr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else {
+           divr_u(r0, r1, r2);
+           mulr(r0, r2, r0);
+           subr(r0, r1, r0);
+       }
+    }
+    else
+       divrem(0, 0, r0, r1, r2);
 }
 
 static void
@@ -2741,8 +2857,8 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
     jit_word_t         w;
     jit_word_t         d;
     jit_int32_t                reg;
+    /* i1 means jump is reachable in signed 24 bits  */
     if (i1) {
-       /* Assume jump is not longer than 23 bits if inside jit */
        w = _jit->pc.w;
        /* if thumb and in thumb mode */
        if (jit_thumb_p() && _jitc->thumb) {
@@ -3835,14 +3951,29 @@ _callr(jit_state_t *_jit, jit_int32_t r0)
 }
 
 static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t exchange_p)
 {
     jit_word_t         d;
     jit_int32_t                reg;
-    d = ((i0 - _jit->pc.w) >> 2) - 2;
-    if (!jit_exchange_p() && !jit_thumb_p() && _s24P(d))
-       BLI(d & 0x00ffffff);
+    if (!exchange_p) {
+       if (jit_thumb_p()) {
+           if (jit_exchange_p())
+               /* skip switch from  arm to thumb 
+                * exchange_p set to zero means a jit function
+                * call in the same jit code buffer */
+               d = ((i0 + 8 - _jit->pc.w) >> 1) - 2;
+           else
+               d = ((i0 - _jit->pc.w) >> 1) - 2;
+       }
+       else                    d = ((i0 - _jit->pc.w) >> 2) - 2;
+       if (_s24P(d)) {
+           if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
+           else                BLI(d & 0x00ffffff);
+       }
+       else                    goto fallback;
+    }
     else {
+    fallback:
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        if (jit_thumb_p())
@@ -3854,28 +3985,44 @@ _calli(jit_state_t *_jit, jit_word_t i0)
 }
 
 static jit_word_t
-_calli_p(jit_state_t *_jit, jit_word_t i0)
+_calli_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
 {
     jit_word_t         w;
+    jit_word_t         d;
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    w = _jit->pc.w;
-    movi_p(rn(reg), i0);
-    if (jit_thumb_p())
-       T1_BLX(rn(reg));
-    else
-       BLX(rn(reg));
-    jit_unget_reg(reg);
+    /* i1 means call is reachable in signed 24 bits  */
+    if (i1) {
+       w = _jit->pc.w;
+       if (jit_thumb_p())      d = ((i0 - _jit->pc.w) >> 1) - 2;
+       else                    d = ((i0 - _jit->pc.w) >> 2) - 2;
+       assert(_s24P(d));
+       if (jit_thumb_p())      T2_BLI(encode_thumb_jump(d));
+       else                    BLI(d & 0x00ffffff);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       w = _jit->pc.w;
+       movi_p(rn(reg), i0);
+       if (jit_thumb_p())
+           T1_BLX(rn(reg));
+       else
+           BLX(rn(reg));
+       jit_unget_reg(reg);
+    }
     return (w);
 }
 
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, mask, count;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
+       if (jit_swf_p())
+           CHECK_SWF_OFFSET();
+       CHECK_REG_ARGS();
        if (_jitc->function->assume_frame) {
            if (jit_thumb_p() && !_jitc->thumb)
                _jitc->thumb = _jit->pc.w;
@@ -3888,38 +4035,66 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 8 bytes */
                              _jitc->function->self.aoff) + 7) & -8;
+    /* If this jit_check_frame() succeeds, it actually is just a need_stack,
+     * usually for arguments, so, allocai was not called, but pusharg*
+     * was called increasing stack size, for negative access offsets.
+     * This can be optimized for one less prolog instruction, that is,
+     * do not create the frame pointer, and only add _jitc->function->stack
+     * to sp, and on epilog, instead of moving fp to sp, just add negative
+     * value of _jitc->function->stack. Since this condition requires a
+     * large function body for excess arguments to called function, keep
+     * things a bit simpler for now, as this is the only place need_stack
+     * would be useful. */
+    if (_jitc->function->stack)
+       jit_check_frame();
+
+    for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           mask |= 1 << rn(iregs[reg]);
+           ++count;
+       }
+    }
+    /* One extra register to keep stack 8 bytes aligned */
+    if (count & 1) {
+       for (reg = 4; reg < 10; reg++) {
+           if (!(mask & (1 << reg))) {
+               mask |= 1 << reg;
+               break;
+           }
+       }
+    }
+    if (_jitc->function->need_frame || _jitc->function->need_return)
+       mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
+    if (!jit_swf_p() && _jitc->function->save_reg_args &&
+       !(_jitc->function->self.call & jit_call_varargs))
+       mask |= 0xf;
 
     if (jit_thumb_p()) {
        /*  switch to thumb mode (better approach would be to
         * ORR 1 address being called, but no clear distinction
         * of what is a pointer to a jit function, or if patching
         * a pointer to a jit function) */
-       ADDI(_R12_REGNO, _R15_REGNO, 1);
-       BX(_R12_REGNO);
+       if (jit_exchange_p()) {
+           ADDI(_R12_REGNO, _R15_REGNO, 1);
+           BX(_R12_REGNO);
+       }
        if (!_jitc->thumb)
            _jitc->thumb = _jit->pc.w;
-       if (jit_cpu.abi) {
-           T2_PUSH(0xf);
-           T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-           VPUSH_F64(_D8_REGNO, 8);
-       }
-       else {
+       if (jit_swf_p() || (_jitc->function->save_reg_args &&
+                           (_jitc->function->self.call & jit_call_varargs)))
            T2_PUSH(0xf);
-           T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-       }
+       if (mask)
+           T2_PUSH(mask);
     }
     else {
-       if (jit_cpu.abi) {
-           PUSH(0xf);
-           PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-           VPUSH_F64(_D8_REGNO, 8);
-       }
-       else {
+       if (jit_swf_p() || (_jitc->function->save_reg_args &&
+                           (_jitc->function->self.call & jit_call_varargs)))
            PUSH(0xf);
-           PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-       }
+       if (mask)
+           PUSH(mask);
     }
-    movr(_FP_REGNO, _SP_REGNO);
+    if (_jitc->function->need_frame)
+       movr(_FP_REGNO, _SP_REGNO);
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
@@ -3933,17 +4108,41 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, mask, count;
     if (_jitc->function->assume_frame)
        return;
 
-    movr(_SP_REGNO, _FP_REGNO);
-    if (jit_cpu.abi)
-       VPOP_F64(_D8_REGNO, 8);
-    if (jit_thumb_p())
-       T2_POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-    else
-       POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-    addi(_SP_REGNO, _SP_REGNO, 16);
+    for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           mask |= 1 << rn(iregs[reg]);
+           ++count;
+       }
+    }
+    /* One extra register to keep stack 8 bytes aligned */
+    if (count & 1) {
+       for (reg = 4; reg < 10; reg++) {
+           if (!(mask & (1 << reg))) {
+               mask |= 1 << reg;
+               break;
+           }
+       }
+    }
+    if (_jitc->function->need_frame || _jitc->function->need_return)
+       mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
+    if (_jitc->function->need_frame)
+       movr(_SP_REGNO, _FP_REGNO);
+    if (!jit_swf_p() && _jitc->function->save_reg_args &&
+       !(_jitc->function->self.call & jit_call_varargs))
+       addi(_SP_REGNO, _SP_REGNO, 16);
+    if (mask) {
+       if (jit_thumb_p())
+           T2_POP(mask);
+       else
+           POP(mask);
+    }
+    if (jit_swf_p() || (_jitc->function->save_reg_args &&
+                       (_jitc->function->self.call & jit_call_varargs)))
+       addi(_SP_REGNO, _SP_REGNO, 16);
     if (jit_thumb_p())
        T1_BX(_LR_REGNO);
     else
@@ -3961,8 +4160,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
      * The -16 is to account for the 4 argument registers
      * always saved, and _jitc->function->vagp is to account
      * for declared arguments. */
-    addi(r0, _FP_REGNO, _jitc->function->self.size -
-        16 + _jitc->function->vagp);
+    addi(r0, _FP_REGNO, jit_selfsize() - 16 + _jitc->function->vagp);
 }
 
 static void
@@ -3989,7 +4187,28 @@ _patch_at(jit_state_t *_jit,
        jit_word_t       w;
     } u;
     u.w = instr;
-    if (kind == arm_patch_jump) {
+    if (kind == arm_patch_call) {
+       if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
+           code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           assert((thumb.i & THUMB2_BLI) == THUMB2_BLI);
+           /* skip code to switch from arm to thumb mode */
+           if (jit_exchange_p())
+               d = ((label + 8 - instr) >> 1) - 2;
+           else
+               d = ((label - instr) >> 1) - 2;
+           assert(_s24P(d));
+           thumb.i = THUMB2_BLI | encode_thumb_jump(d);
+           thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+       }
+       else {
+           thumb.i = u.i[0];
+           assert((thumb.i & 0x0f000000) == ARM_BLI);
+           d = ((label - instr) >> 2) - 2;
+           assert(_s24P(d));
+           u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
+       }
+    }
+    else if (kind == arm_patch_jump) {
        if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
            code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
            if ((thumb.i & THUMB2_B) == THUMB2_B) {
index c88f9e3..2aa6a12 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -564,6 +564,8 @@ _swf_ff(jit_state_t *_jit, float(*i0)(float),
        jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -581,6 +583,8 @@ _swf_dd(jit_state_t *_jit, double (*i0)(double),
        jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -614,6 +618,8 @@ _swf_fff(jit_state_t *_jit, float (*i0)(float, float),
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -635,6 +641,8 @@ _swf_ddd(jit_state_t *_jit, double (*i0)(double, double),
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -684,6 +692,8 @@ _swf_fff_(jit_state_t *_jit, float (*i0)(float, float),
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.f = i1;
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -706,6 +716,8 @@ _swf_rsbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.f = i0;
     movi(_R0_REGNO, data.i);
     if (jit_fpr_p(r1))
@@ -729,7 +741,8 @@ _swf_ddd_(jit_state_t *_jit, double (*i0)(double, double),
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
-
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.d = i1;
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -769,6 +782,8 @@ _swf_rsbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.d = i0;
     movi(_R0_REGNO, data.i[0]);
     movi(_R1_REGNO, data.i[1]);
@@ -805,6 +820,8 @@ _swf_iff(jit_state_t *_jit, int (*i0)(float, float),
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -823,6 +840,8 @@ _swf_idd(jit_state_t *_jit, int (*i0)(double, double),
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -861,6 +880,8 @@ _swf_iff_(jit_state_t *_jit, int (*i0)(float, float),
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.f = i1;
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -881,6 +902,8 @@ _swf_idd_(jit_state_t *_jit, int (*i0)(double, double),
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.d = i1;
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -907,6 +930,8 @@ _swf_iunff(jit_state_t *_jit, int (*i0)(float, float),
 {
     jit_word_t         instr;
     jit_get_reg_args();
+    if (jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -952,6 +977,8 @@ _swf_iundd(jit_state_t *_jit, int (*i0)(double, double),
 {
     jit_word_t         instr;
     jit_get_reg_args();
+    if (jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -1033,6 +1060,8 @@ _swf_iunff_(jit_state_t *_jit, int (*i0)(float, float),
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.f = i1;
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -1077,6 +1106,8 @@ _swf_iundd_(jit_state_t *_jit, int (*i0)(double, double),
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.d = i1;
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -1135,6 +1166,8 @@ _swf_bff(jit_state_t *_jit, int (*i0)(float, float), int cc,
 {
     jit_word_t         w, d;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     else
@@ -1168,6 +1201,8 @@ _swf_bdd(jit_state_t *_jit, int (*i0)(double, double), int cc,
 {
     jit_word_t         w, d;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
@@ -1221,6 +1256,8 @@ _swf_bff_(jit_state_t *_jit, int (*i0)(float, float), int cc,
     } data;
     jit_word_t         w, d;
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.f = i2;
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
@@ -1256,6 +1293,8 @@ _swf_bdd_(jit_state_t *_jit, int (*i0)(double, double), int cc,
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.d = i2;
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -1296,6 +1335,8 @@ _swf_bunff(jit_state_t *_jit, int eq,
 {
     jit_word_t         w, d, j0, j1;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     else
@@ -1366,6 +1407,8 @@ _swf_bundd(jit_state_t *_jit, int eq,
 {
     jit_word_t         w, d, j0, j1;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
@@ -1473,6 +1516,8 @@ _swf_bunff_(jit_state_t *_jit, int eq,
     jit_word_t         w, d, j0, j1;
     data.f = i1;
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     else
@@ -1541,6 +1586,8 @@ _swf_bundd_(jit_state_t *_jit, int eq,
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.d = i1;
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -1622,6 +1669,8 @@ static void
 _swf_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     movr(_R0_REGNO, r1);
     swf_call(__aeabi_i2f, i2f, _R1_REGNO);
     if (jit_fpr_p(r0))
@@ -1635,6 +1684,8 @@ static void
 _swf_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     movr(_R0_REGNO, r1);
     swf_call(__aeabi_i2d, i2d, _R2_REGNO);
     if (jit_fpr_p(r0)) {
@@ -1656,6 +1707,8 @@ static void
 _swf_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -1680,6 +1733,8 @@ static void
 _swf_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -1709,6 +1764,8 @@ _swf_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_word_t         slow_not_nan;
 #endif
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -1763,6 +1820,8 @@ _swf_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_word_t         slow_not_nan;
 #endif
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -1823,6 +1882,8 @@ _swf_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (r0 != r1) {
+       if (jit_fpr_p(r0) || jit_fpr_p(r1))
+           CHECK_SWF_OFFSET();
        if (jit_fpr_p(r1)) {
            reg = jit_get_reg(jit_class_gpr);
            swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
@@ -1844,6 +1905,8 @@ _swf_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (r0 != r1) {
+       if (jit_fpr_p(r0) || jit_fpr_p(r1))
+           CHECK_SWF_OFFSET();
        if (jit_fpr_p(r1)) {
            if (!jit_thumb_p() && jit_armv5e_p() &&
                (reg = jit_get_reg_pair()) != JIT_NOREG) {
@@ -1894,6 +1957,8 @@ _swf_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
        jit_float32_t   f;
     } data;
     jit_int32_t                reg;
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.f = i0;
     if (jit_fpr_p(r0)) {
        reg = jit_get_reg(jit_class_gpr);
@@ -1913,6 +1978,8 @@ _swf_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
        jit_int32_t     i[2];
        jit_float64_t   d;
     } data;
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.d = i0;
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p() &&
@@ -1941,6 +2008,8 @@ static void
 _swf_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
@@ -1966,6 +2035,8 @@ static void
 _swf_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
            r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
@@ -2013,6 +2084,8 @@ static void
 _swf_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
@@ -2038,6 +2111,8 @@ static void
 _swf_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
            r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
@@ -2170,6 +2245,7 @@ _swf_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        ldxi_i(rn(reg), r1, 0);
        swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
@@ -2184,6 +2260,7 @@ _swf_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (reg = jit_get_reg_pair()) != JIT_NOREG) {
            LDRDI(rn(reg), r1, 0);
@@ -2212,6 +2289,7 @@ _swf_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        ldi_i(rn(reg), i0);
        swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
@@ -2225,6 +2303,8 @@ static void
 _swf_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
        (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
        movi(rn(rg0), i0);
@@ -2258,6 +2338,7 @@ _swf_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        ldxr_i(rn(reg), r1, r2);
        swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
@@ -2272,6 +2353,7 @@ _swf_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
            LDRD(rn(rg0), r1, r2);
@@ -2307,6 +2389,8 @@ static void
 _swf_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0)) {
        reg = jit_get_reg(jit_class_gpr);
        ldxi_i(rn(reg), r1, i0);
@@ -2322,6 +2406,7 @@ _swf_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
@@ -2391,6 +2476,7 @@ _swf_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r1)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
        stxi_i(0, r0, rn(reg));
@@ -2405,6 +2491,7 @@ _swf_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r1)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (reg = jit_get_reg_pair()) != JIT_NOREG) {
            LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
@@ -2435,6 +2522,7 @@ _swf_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
        sti_i(i0, rn(reg));
@@ -2449,6 +2537,7 @@ _swf_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
            rg1 = jit_get_reg(jit_class_gpr);
@@ -2488,6 +2577,7 @@ _swf_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r2)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r2) + 8);
        stxr_i(r1, r0, rn(reg));
@@ -2502,6 +2592,7 @@ _swf_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r2)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
            LDRDIN(rn(rg0), _FP_REGNO, swf_off(r2) + 8);
@@ -2538,6 +2629,7 @@ _swf_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r1)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
        stxi_i(i0, r0, rn(reg));
@@ -2552,6 +2644,7 @@ _swf_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r1)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
index 14f085a..faba5a8 100644 (file)
@@ -1,12 +1,13 @@
 
 #if __WORDSIZE == 32
 #if defined(__ARM_PCS_VFP)
-#define JIT_INSTR_MAX 48
+#define JIT_INSTR_MAX 50
     0, /* data */
     0, /* live */
-    2, /* align */
+    14,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     2, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     16,        /* va_arg_d */
     8, /* movi */
     8, /* movnr */
     8, /* movzr */
+    42,        /* casr */
+    50,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    8, /* bswapr_us */
+    4, /* bswapr_ui */
+    0, /* bswapr_ul */
     8, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     4, /* callr */
     20,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     8, /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    16,        /* movi_d */
+    32,        /* movi_d */
     4, /* ldr_d */
     12,        /* ldi_d */
     8, /* ldxr_d */
     12,        /* bler_d */
     28,        /* blei_d */
     12,        /* beqr_d */
-    28,        /* beqi_d */
+    36,        /* beqi_d */
     12,        /* bger_d */
     28,        /* bgei_d */
     12,        /* bgtr_d */
     12,        /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    8, /* bswapr_us */
-    4, /* bswapr_ui */
-    0, /* bswapr_ul */
-    40,        /* casr */
-    48,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    12,        /* cto */
+    8, /* ctz */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 32
 #if !defined(__ARM_PCS_VFP)
-#define JIT_INSTR_MAX 160
+#define JIT_INSTR_MAX 50
     0, /* data */
     0, /* live */
-    2, /* align */
+    18,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     2, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     28,        /* va_arg_d */
     8, /* movi */
     8, /* movnr */
     8, /* movzr */
+    42,        /* casr */
+    46,        /* casi */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    20,        /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
     20,        /* htonr_us */
     16,        /* htonr_ui */
     0, /* htonr_ul */
     4, /* callr */
     20,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_i */
     0, /* retval_ui */
     0, /* retval_l */
-    160,       /* epilog */
+    30,        /* epilog */
     0, /* arg_f */
     0, /* getarg_f */
     0, /* putargr_f */
     28,        /* extr_f */
     22,        /* extr_d_f */
     8, /* movr_f */
-    12,        /* movi_f */
+    16,        /* movi_f */
     8, /* ldr_f */
     16,        /* ldi_f */
     8, /* ldxr_f */
     28,        /* bler_f */
     32,        /* blei_f */
     28,        /* beqr_f */
-    40,        /* beqi_f */
+    48,        /* beqi_f */
     28,        /* bger_f */
     32,        /* bgei_f */
     28,        /* bgtr_f */
     72,        /* unordi_d */
     20,        /* truncr_d_i */
     0, /* truncr_d_l */
-    28,        /* extr_d */
+    36,        /* extr_d */
     22,        /* extr_f_d */
     16,        /* movr_d */
-    20,        /* movi_d */
+    32,        /* movi_d */
     16,        /* ldr_d */
     24,        /* ldi_d */
     20,        /* ldxr_d */
     12,        /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    16,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    40,        /* casr */
-    44,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    12,        /* cto */
+    8, /* ctz */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
index 4b146d2..20f80a2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -1255,7 +1255,7 @@ _vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
     if (jit_fpr_p(r0)) {
        /* float arguments are packed, for others,
         * lightning only address even registers */
-       if (!(r0 & 1) && (r0 - 16) >= 0 &&
+       if (!(r0 & 1) && (r0 - 32) >= 0 &&
            ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
             (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
            VIMM(code, r0);
index 6b121bf..64a70f9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -21,6 +21,8 @@
 #  include <stdio.h>
 #endif
 
+#define stack_framesize                        48
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 4)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 16)
 #define jit_arg_d_reg_p(i)             ((i) >= 0 && (i) < 15)
 #define arm_patch_node                 0x80000000
 #define arm_patch_word                 0x40000000
 #define arm_patch_jump                 0x20000000
-#define arm_patch_load                 0x00000000
+#define arm_patch_load                 0x10000000
+#define arm_patch_call                 0x08000000
 
 #define jit_fpr_p(rn)                  ((rn) > 15)
 
-#define arg_base()                                                     \
-    (stack_framesize - 16 + (jit_cpu.abi ? 64 : 0))
+#define arg_base()                     (stack_framesize - 16)
 #define arg_offset(n)                                                  \
     ((n) < 4 ? arg_base() + ((n) << 2) : (n))
 
  * arm mode, what may cause a crash upon return of that function
  * if generating jit for a relative jump.
  */
-#define jit_exchange_p()               1
+#define jit_exchange_p()               jit_cpu.exchange
 
 /* FIXME is it really required to not touch _R10? */
 
+#define CHECK_REG_ARGS()                                               \
+    do {                                                               \
+       if (!_jitc->function->save_reg_args)                            \
+           _jitc->again = _jitc->function->save_reg_args = 1;          \
+    } while (0)
+
+#define CHECK_SWF_OFFSET()                                             \
+    do {                                                               \
+       if (!_jitc->function->swf_offset) {                             \
+           _jitc->again = _jitc->function->save_reg_args =             \
+               _jitc->function->swf_offset = 1;                        \
+           _jitc->function->self.aoff = -64;                           \
+       }                                                               \
+    } while (0)
+
+#define CHECK_RETURN()                                                 \
+    do {                                                               \
+       if (!_jitc->function->need_frame &&                             \
+           !_jitc->function->need_return)                              \
+           _jitc->again = _jitc->function->need_return = 1;            \
+    } while (0)
+
 /*
  * Types
  */
@@ -59,8 +83,8 @@ typedef jit_pointer_t jit_va_list;
 /*
  * Prototypes
  */
-#define jit_make_arg(node)             _jit_make_arg(_jit,node)
-static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*);
+#define jit_make_arg(node,code)                _jit_make_arg(_jit,node,code)
+static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t);
 #define jit_make_arg_f(node)           _jit_make_arg_f(_jit,node)
 static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
 #define jit_make_arg_d(node)           _jit_make_arg_d(_jit,node)
@@ -77,8 +101,10 @@ static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t);
 static void _flush_consts(jit_state_t*);
 #define invalidate_consts()            _invalidate_consts(_jit)
 static void _invalidate_consts(jit_state_t*);
-#define patch(instr, node)             _patch(_jit, instr, node)
-static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
+#define patch(instr, node, kind)       _patch(_jit, instr, node, kind)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*,jit_int32_t);
 
 #if defined(__GNUC__)
 /* libgcc */
@@ -149,6 +175,10 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _R4, _R5, _R6, _R7, _R8, _R9,
+};
+
 /*
  * Implementation
  */
@@ -202,6 +232,14 @@ jit_get_cpu(void)
     /* armv6t2 todo (software float and thumb2) */
     if (!jit_cpu.vfp && jit_cpu.thumb)
        jit_cpu.thumb = 0;
+    /* FIXME need test environments for the below. For the moment just
+     * be very conservative */
+    /* force generation of code assuming jit and function libraries called
+     * instruction set do not match */
+    jit_cpu.exchange = 1;
+    /* do not generate hardware integer division by default */
+    if (jit_cpu.version == 7)
+       jit_cpu.extend = 0;
 }
 
 void
@@ -245,15 +283,10 @@ _jit_prolog(jit_state_t *_jit)
     }
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
     _jitc->function->self.size = stack_framesize;
-    if (jit_cpu.abi)
-       _jitc->function->self.size += 64;
     _jitc->function->self.argi = _jitc->function->self.argf =
-       _jitc->function->self.alen = 0;
-    if (jit_swf_p())
-       /* 8 soft float registers */
-       _jitc->function->self.aoff = -64;
-    else
-       _jitc->function->self.aoff = 0;
+       _jitc->function->self.alen = _jitc->function->self.aoff = 0;
+    _jitc->function->swf_offset = _jitc->function->save_reg_args =
+       _jitc->function->need_return = 0;
     _jitc->function->self.call = jit_call_default;
     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
              _jitc->reglen * sizeof(jit_int32_t));
@@ -279,6 +312,9 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    if (jit_swf_p())
+       CHECK_SWF_OFFSET();
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -327,20 +363,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -422,7 +456,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code != jit_code_arg) {
+    if (!(u->code >= jit_code_arg_c && u->code <= jit_code_arg)) {
        if (u->code == jit_code_arg_f) {
            if (jit_cpu.abi)
                return (jit_arg_f_reg_p(u->u.w));
@@ -437,7 +471,7 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 }
 
 static jit_node_t *
-_jit_make_arg(jit_state_t *_jit, jit_node_t *node)
+_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code)
 {
     jit_int32_t                 offset;
     if (jit_arg_reg_p(_jitc->function->self.argi))
@@ -447,7 +481,7 @@ _jit_make_arg(jit_state_t *_jit, jit_node_t *node)
        _jitc->function->self.size += sizeof(jit_word_t);
     }
     if (node == (jit_node_t *)0)
-       node = jit_new_node(jit_code_arg);
+       node = jit_new_node(code);
     else
        link_node(node);
     node->u.w = offset;
@@ -534,9 +568,10 @@ _jit_ellipsis(jit_state_t *_jit)
     else {
        assert(!(_jitc->function->self.call & jit_call_varargs));
        _jitc->function->self.call |= jit_call_varargs;
+       CHECK_REG_ARGS();
        if (jit_cpu.abi &&  _jitc->function->self.argf)
            rewind_prolog();
-       /* First 4 stack addresses are always spilled r0-r3 */
+       /* First 4 stack addresses need to be spilled r0-r3 */
        if (jit_arg_reg_p(_jitc->function->self.argi))
            _jitc->function->vagp = _jitc->function->self.argi * 4;
        else
@@ -559,16 +594,21 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     assert(_jitc->function);
-    return (jit_make_arg((jit_node_t*)0));
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
+    return (jit_make_arg((jit_node_t*)0, code));
 }
 
 jit_node_t *
 _jit_arg_f(jit_state_t *_jit)
 {
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
     return (jit_make_arg_f((jit_node_t*)0));
 }
 
@@ -576,103 +616,141 @@ jit_node_t *
 _jit_arg_d(jit_state_t *_jit)
 {
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
     return (jit_make_arg_d((jit_node_t*)0));
 }
 
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_swf_p())
-       jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_c(u, JIT_FP, v->u.w);
+       node = jit_ldxi_c(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_swf_p())
-       jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_uc(u, JIT_FP, v->u.w);
+       node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_swf_p())
-       jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_s(u, JIT_FP, v->u.w);
+       node = jit_ldxi_s(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_swf_p())
-       jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_us(u, JIT_FP, v->u.w);
+       node = jit_ldxi_us(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_swf_p())
-       jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
+       node = jit_ldxi_i(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    jit_node_t         *node = NULL;
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_swf_p())
-       jit_stxi(arg_offset(v->u.w), JIT_FP, u);
+       node = jit_stxi(arg_offset(v->u.w), JIT_FP, u);
     else if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     else
-       jit_stxi(v->u.w, JIT_FP, u);
+       node = jit_stxi(v->u.w, JIT_FP, u);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
-    jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    jit_int32_t                 regno;
+    jit_node_t         *node = NULL;
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_swf_p()) {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(arg_offset(v->u.w), JIT_FP, regno);
+       node = jit_stxi(arg_offset(v->u.w), JIT_FP, regno);
        jit_unget_reg(regno);
     }
     else if (jit_arg_reg_p(v->u.w))
@@ -680,30 +758,41 @@ _jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
     else {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, JIT_FP, regno);
+       node = jit_stxi(v->u.w, JIT_FP, regno);
        jit_unget_reg(regno);
     }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(getarg_f, u, v);
     if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_f(u, JIT_FA0 - v->u.w);
        else
-           jit_ldxi_f(u, JIT_FP, v->u.w);
+           node = jit_ldxi_f(u, JIT_FP, v->u.w);
     }
     else if (jit_swf_p())
-       jit_ldxi_f(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_f(u, JIT_FP, arg_offset(v->u.w));
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_w_f(u, JIT_RA0 - v->u.w);
        else
-           jit_ldxi_f(u, JIT_FP, v->u.w);
+           node = jit_ldxi_f(u, JIT_FP, v->u.w);
+    }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -711,21 +800,27 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(putargr_f, u, v);
     if (jit_cpu.abi) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_f(JIT_FA0 - v->u.w, u);
        else
-           jit_stxi_f(v->u.w, JIT_FP, u);
+           node = jit_stxi_f(v->u.w, JIT_FP, u);
     }
     else if (jit_swf_p())
-       jit_stxi_f(arg_offset(v->u.w), JIT_FP, u);
+       node = jit_stxi_f(arg_offset(v->u.w), JIT_FP, u);
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_f_w(JIT_RA0 - v->u.w, u);
        else
-           jit_stxi_f(v->u.w, JIT_FP, u);
+           node = jit_stxi_f(v->u.w, JIT_FP, u);
+    }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -733,7 +828,8 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
 {
-    jit_int32_t                regno;
+    jit_int32_t                 regno;
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_fp(putargi_f, u, v);
     if (jit_cpu.abi) {
@@ -742,14 +838,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
        else {
            regno = jit_get_reg(jit_class_fpr);
            jit_movi_f(regno, u);
-           jit_stxi_f(v->u.w, JIT_FP, regno);
+           node = jit_stxi_f(v->u.w, JIT_FP, regno);
            jit_unget_reg(regno);
        }
     }
     else if (jit_swf_p()) {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(arg_offset(v->u.w), JIT_FP, regno);
+       node = jit_stxi_f(arg_offset(v->u.w), JIT_FP, regno);
        jit_unget_reg(regno);
     }
     else {
@@ -758,30 +854,41 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
        if (jit_arg_reg_p(v->u.w))
            jit_movr_f_w(JIT_RA0 - v->u.w, regno);
        else
-           jit_stxi_f(v->u.w, JIT_FP, regno);
+           node = jit_stxi_f(v->u.w, JIT_FP, regno);
        jit_unget_reg(regno);
     }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(getarg_d, u, v);
     if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_d(u, JIT_FA0 - v->u.w);
        else
-           jit_ldxi_d(u, JIT_FP, v->u.w);
+           node = jit_ldxi_d(u, JIT_FP, v->u.w);
     }
     else if (jit_swf_p())
-       jit_ldxi_d(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_d(u, JIT_FP, arg_offset(v->u.w));
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_ww_d(u, JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1));
        else
-           jit_ldxi_d(u, JIT_FP, v->u.w);
+           node = jit_ldxi_d(u, JIT_FP, v->u.w);
+    }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -789,21 +896,27 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(putargr_d, u, v);
     if (jit_cpu.abi) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_d(JIT_FA0 - v->u.w, u);
        else
-           jit_stxi_d(v->u.w, JIT_FP, u);
+           node = jit_stxi_d(v->u.w, JIT_FP, u);
     }
     else if (jit_swf_p())
-       jit_stxi_d(arg_offset(v->u.w), JIT_FP, u);
+       node = jit_stxi_d(arg_offset(v->u.w), JIT_FP, u);
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), u);
        else
-           jit_stxi_d(v->u.w, JIT_FP, u);
+           node = jit_stxi_d(v->u.w, JIT_FP, u);
+    }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -811,7 +924,8 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 {
-    jit_int32_t                regno;
+    jit_int32_t                 regno;
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_dp(putargi_d, u, v);
     if (jit_cpu.abi) {
@@ -820,14 +934,14 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
        else {
            regno = jit_get_reg(jit_class_fpr);
            jit_movi_d(regno, u);
-           jit_stxi_d(v->u.w, JIT_FP, regno);
+           node = jit_stxi_d(v->u.w, JIT_FP, regno);
            jit_unget_reg(regno);
        }
     }
     else if (jit_swf_p()) {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(arg_offset(v->u.w), JIT_FP, regno);
+       node = jit_stxi_d(arg_offset(v->u.w), JIT_FP, regno);
        jit_unget_reg(regno);
     }
     else {
@@ -836,17 +950,22 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
        if (jit_arg_reg_p(v->u.w))
            jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), regno);
        else
-           jit_stxi_d(v->u.w, JIT_FP, regno);
+           node = jit_stxi_d(v->u.w, JIT_FP, regno);
        jit_unget_reg(regno);
     }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
@@ -860,11 +979,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
@@ -1148,6 +1267,7 @@ _emit_code(jit_state_t *_jit)
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1293,7 +1413,7 @@ _emit_code(jit_state_t *_jit)
                else {                                                  \
                    word = name##r##type(_jit->pc.w,                    \
                                         rn(node->v.w), rn(node->w.w)); \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break
 #define case_bvv(name, type)                                           \
@@ -1318,7 +1438,7 @@ _emit_code(jit_state_t *_jit)
                        word = vfp_##name##r##type(_jit->pc.w,          \
                                                   rn(node->v.w),       \
                                                   rn(node->w.w));      \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break
 #define case_brw(name, type)                                           \
@@ -1332,7 +1452,7 @@ _emit_code(jit_state_t *_jit)
                else {                                                  \
                    word = name##i##type(_jit->pc.w,                    \
                                         rn(node->v.w), node->w.w);     \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break;
 #define case_bvf(name)                                                 \
@@ -1357,7 +1477,7 @@ _emit_code(jit_state_t *_jit)
                        word = vfp_##name##i_f(_jit->pc.w,              \
                                               rn(node->v.w),           \
                                               node->w.f);              \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break
 #define case_bvd(name)                                                 \
@@ -1382,7 +1502,7 @@ _emit_code(jit_state_t *_jit)
                        word = vfp_##name##i_d(_jit->pc.w,              \
                                               rn(node->v.w),           \
                                               node->w.d);              \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break
 #if DEVEL_DISASSEMBLER
@@ -1405,6 +1525,12 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               if (jit_thumb_p())
+                   nop((node->u.w + 1) & ~1);
+               else
+                   nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                if (must_align_p(node->next))
                    nop(2);
@@ -1456,6 +1582,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1526,7 +1656,7 @@ _emit_code(jit_state_t *_jit)
                        assert(temp->code == jit_code_label ||
                               temp->code == jit_code_epilog);
                        word = movi_p(rn(node->u.w), temp->u.w);
-                       patch(word, node);
+                       patch(word, node, arm_patch_word);
                    }
                }
                else
@@ -1765,6 +1895,7 @@ _emit_code(jit_state_t *_jit)
                case_bvv(bunord, _d);
                case_bvd(bunord);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                flush_consts();
                break;
@@ -1776,36 +1907,59 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w, 1);
-                       patch(word, node);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (jit_thumb_p())      word >>= 1;
+                       else                    word >>= 2;
+                       word -= 2;
+                       value = _s24P(word);
+                       word = jmpi_p(_jit->pc.w, value);
+                       patch(word, node, value ?
+                             arm_patch_jump : arm_patch_word);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
+               }
                flush_consts();
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
                if (node->flag & jit_flag_node) {
+                   CHECK_RETURN();
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
-                       calli(temp->u.w);
+                       calli(temp->u.w, 0);
                    else {
-                       word = calli_p(_jit->pc.w);
-                       patch(word, node);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (jit_exchange_p())
+                           word -= 8;
+                       if (jit_thumb_p())      word >>= 1;
+                       else                    word >>= 2;
+                       word -= 2;
+                       value = _s24P(word);
+                       word = calli_p(_jit->pc.w, value);
+                       patch(word, node, value ?
+                             arm_patch_call : arm_patch_word);
                    }
                }
-               else
-                   calli(node->u.w);
+               else {
+                   jit_check_frame();
+                   calli(node->u.w, jit_exchange_p());
+               }
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1819,6 +1973,8 @@ _emit_code(jit_state_t *_jit)
 #endif
            restart_function:
                _jitc->again = 0;
+               compute_framesize();
+               patch_alist(0);
                prolog(node);
                break;
            case jit_code_epilog:
@@ -1833,6 +1989,21 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   undo.func.need_return = _jitc->function->need_return;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* swf_offset and check_reg_args must also not be undone */
+                   undo.func.swf_offset = _jitc->function->swf_offset;
+                   undo.func.save_reg_args = _jitc->function->save_reg_args;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1845,6 +2016,7 @@ _emit_code(jit_state_t *_jit)
                    if (_jitc->data_info.ptr)
                        _jitc->data_info.offset = undo.info_offset;
 #endif
+                   patch_alist(1);
                    goto restart_function;
                }
                /* remember label is defined */
@@ -1907,21 +2079,34 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_getarg_s:             case jit_code_getarg_us:
            case jit_code_getarg_i:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case  jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1984,7 +2169,10 @@ _emit_code(jit_state_t *_jit)
        node = _jitc->patches.ptr[offset].node;
        word = _jitc->patches.ptr[offset].inst;
        if (!jit_thumb_p() &&
-           (node->code == jit_code_movi || node->code == jit_code_calli)) {
+           (node->code == jit_code_movi ||
+            (node->code == jit_code_calli &&
+             (_jitc->patches.ptr[offset].kind & ~arm_patch_node) ==
+             arm_patch_word))) {
            /* calculate where to patch word */
            value = *(jit_int32_t *)word;
            assert((value & 0x0f700000) == ARM_LDRI);
@@ -2254,24 +2442,31 @@ _invalidate_consts(jit_state_t *_jit)
 }
 
 static void
-_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = sizeof(jit_word_t) * 2; /* lr+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += sizeof(jit_word_t);
+
+    if (_jitc->function->save_reg_args)
+       _jitc->framesize += 16;
+
+    /* Make sure functions called have a 8 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 7) & -8;
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node, jit_int32_t kind)
 {
     jit_int32_t                 flag;
-    jit_int32_t                 kind;
 
     assert(node->flag & jit_flag_node);
-    if (node->code == jit_code_movi) {
+    if (node->code == jit_code_movi)
        flag = node->v.n->flag;
-       kind = arm_patch_word;
-    }
-    else {
+    else
        flag = node->u.n->flag;
-       if (node->code == jit_code_calli ||
-           (node->code == jit_code_jmpi && !(node->flag & jit_flag_node)))
-           kind = arm_patch_word;
-       else
-           kind = arm_patch_jump;
-    }
     assert(!(flag & jit_flag_patch));
     kind |= arm_patch_node;
     if (_jitc->patches.offset >= _jitc->patches.length) {
index 9ad84f1..a6981fa 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -54,7 +54,7 @@ static FILE                    *disasm_stream;
 #endif
 
 #if BINUTILS_2_38
-static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...)
+static int fprintf_styled(void * stream, enum disassembler_style style, const char* fmt, ...)
 {
   va_list args;
   int r;
@@ -256,7 +256,7 @@ disasm_print_address(bfd_vma addr, struct disassemble_info *info)
     int                         line;
     char                buffer[address_buffer_length];
 
-    sprintf(buffer, address_buffer_format, (long long)addr);
+    sprintf(buffer, address_buffer_format, addr);
     (*info->fprintf_func)(info->stream, "0x%s", buffer);
 
 #  define _jit                         disasm_jit
@@ -406,7 +406,7 @@ _disassemble(jit_state_t *_jit, jit_pointer_t code, jit_int32_t length)
            old_line = line;
        }
 
-       bytes = sprintf(buffer, address_buffer_format, (long long)pc);
+       bytes = sprintf(buffer, address_buffer_format, pc);
        (*disasm_info.fprintf_func)(disasm_stream, "%*c0x%s\t",
                                    16 - bytes, ' ', buffer);
        pc += (*disasm_print)(pc, &disasm_info);
index 8912691..2f7f214 100644 (file)
@@ -12,6 +12,55 @@ static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t);
 #define fallback_casx(r0,r1,r2,r3,im)  _fallback_casx(_jit,r0,r1,r2,r3,im)
 static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t,
                           jit_int32_t, jit_int32_t, jit_word_t);
+#define fallback_clo(r0,r1)            _fallback_clo(_jit,r0,r1)
+static void _fallback_clo(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_clz(r0,r1)            _fallback_clz(_jit,r0,r1)
+static void _fallback_clz(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_cto(r0,r1)            _fallback_cto(_jit,r0,r1)
+static void _fallback_cto(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_ctz(r0,r1)            _fallback_ctz(_jit,r0,r1)
+static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t);
+#  if defined(__ia64__)
+#    define fallback_patch_jmpi(inst,lbl)                              \
+    do {                                                               \
+       sync();                                                         \
+       patch_at(jit_code_jmpi, inst, lbl);                             \
+    } while (0)
+#  else
+#    define fallback_patch_jmpi(inst,lbl) fallback_patch_at(inst,lbl)
+#  endif
+#  if defined(__arm__)
+#    define fallback_patch_at(inst,lbl)        patch_at(arm_patch_jump,inst,lbl)
+#  elif defined(__ia64__)
+#    define fallback_patch_at(inst,lbl)                                        \
+    do {                                                               \
+       sync();                                                         \
+       patch_at(jit_code_bnei, inst, lbl);                             \
+    } while (0);
+#  else
+#    define fallback_patch_at(inst,lbl)        patch_at(inst,lbl)
+#  endif
+#  if defined(__mips__)
+#    define fallback_jmpi(i0)          jmpi(i0,1)
+#  elif defined(__arm__)
+#    define fallback_jmpi(i0)          jmpi_p(i0,1)
+#  elif defined(__s390__) || defined(__s390x__)
+#    define fallback_jmpi(i0)          jmpi(i0,1)
+#  else
+#    define fallback_jmpi(i0)          jmpi(i0)
+#  endif
+#  if defined(__mips__)
+#    define fallback_bnei(i0,r0,i1)    bnei(i0,r0,i1)
+#  elif defined(__s390__) || defined(__s390x__)
+#    define fallback_bnei(i0,r0,i1)    bnei_p(i0,r0,i1)
+#  else
+#    define fallback_bnei(i0,r0,i1)    bnei(i0,r0,i1)
+#  endif
+#  if defined(__s390__) || defined(__s390x__)
+#    define fallback_bmsr(i0,r0,r1)    bmsr_p(i0,r0,r1)
+#  else
+#    define fallback_bmsr(i0,r0,r1)    bmsr(i0,r0,r1)
+#  endif
 #endif
 
 #if CODE
@@ -96,16 +145,20 @@ _fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1)
 {
 #  if defined(__arm__)
     movi(rn(_R0), i1);
-#  elif defined(__ia64__)
-    /* avoid confusion with pushargi patching */
-    if (i1 >= -2097152 && i1 <= 2097151)
-       MOVI(_jitc->rout, i1);
-    else
-       MOVL(_jitc->rout, i1);
 #  elif defined(__hppa__)
     movi(_R26_REGNO, i1);
-#endif
+#  endif
+#  if defined(__arm__)
+    calli(i0, jit_exchange_p());
+#  elif defined(__mips__)
+    calli(i0, 0);
+#  elif defined(__powerpc__) && _CALL_SYSV
+    calli(i0, 0);
+#  elif defined(__s390__) || defined(__s390x__)
+    calli(i0, 0);
+#  else
     calli(i0);
+#  endif
 }
 
 static void
@@ -128,7 +181,7 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     fallback_load(r2);
     eqr(r0, r0, r2);
     fallback_save(r0);
-    jump = bnei(_jit->pc.w, r0, 1);
+    jump = fallback_bnei(_jit->pc.w, r0, 1);
     fallback_load(r3);
 #  if __WORDSIZE == 32
     str_i(r1, r3);
@@ -136,21 +189,144 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     str_l(r1, r3);
 #  endif
     /* done: */
-#  if defined(__ia64__)
-    sync();
-# endif
     done = _jit->pc.w;
     fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex);
     fallback_load(r0);
-#  if defined(__arm__)
-    patch_at(arm_patch_jump, jump, done);
-#  elif defined(__ia64__)
-    patch_at(jit_code_bnei, jump, done);
-#  else
-    patch_at(jump, done);
-#  endif
+    fallback_patch_at(jump, done);
     fallback_load_regs(r0);
     if (iscasi)
        jit_unget_reg(r1_reg);
 }
+
+static void
+_fallback_clo(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         clz, done;
+    comr(r0, r1);
+    clz = fallback_bnei(_jit->pc.w, r0, 0);
+    movi(r0, __WORDSIZE);
+    done = fallback_jmpi(_jit->pc.w);
+    fallback_patch_at(clz, _jit->pc.w);
+    fallback_clz(r0, r0);
+    fallback_patch_jmpi(done, _jit->pc.w);
+}
+
+static void
+_fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                r1_reg, r2, r2_reg;
+    jit_word_t         clz, l32, l16, l8, l4, l2, l1;
+    l32 = fallback_bnei(_jit->pc.w, r1, 0);
+    movi(r0, __WORDSIZE);
+    clz = fallback_jmpi(_jit->pc.w);
+    fallback_patch_at(l32, _jit->pc.w);
+    r2_reg = jit_get_reg(jit_class_gpr);
+    r2 = rn(r2_reg);
+    r1_reg = jit_get_reg(jit_class_gpr);
+    movr(rn(r1_reg), r1);
+    r1 = rn(r1_reg);
+    movi(r0, 0);
+#  if __WORDSIZE == 64
+    movi(r2, 0xffffffff00000000UL);
+    l32 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 32);
+    addi(r0, r0, 32);
+    fallback_patch_at(l32, _jit->pc.w);
+    lshi(r2, r2, 16);
+#  else
+    movi(r2, 0xffff0000UL);
+#  endif
+    l16 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 16);
+    addi(r0, r0, 16);
+    fallback_patch_at(l16, _jit->pc.w);
+    lshi(r2, r2, 8);
+    l8 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 8);
+    addi(r0, r0, 8);
+    fallback_patch_at(l8, _jit->pc.w);
+    lshi(r2, r2, 4);
+    l4 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 4);
+    addi(r0, r0, 4);
+    fallback_patch_at(l4, _jit->pc.w);
+    lshi(r2, r2, 2);
+    l2 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 2);
+    addi(r0, r0, 2);
+    fallback_patch_at(l2, _jit->pc.w);
+    lshi(r2, r2, 1);
+    l1 = fallback_bmsr(_jit->pc.w, r1, r2);
+    addi(r0, r0, 1);
+    fallback_patch_at(l1, _jit->pc.w);
+    fallback_patch_jmpi(clz, _jit->pc.w);
+    jit_unget_reg(r2_reg);
+    jit_unget_reg(r1_reg);
+}
+
+static void
+_fallback_cto(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         ctz, done;
+    comr(r0, r1);
+    ctz = fallback_bnei(_jit->pc.w, r0, 0);
+    movi(r0, __WORDSIZE);
+    done = fallback_jmpi(_jit->pc.w);
+    fallback_patch_at(ctz, _jit->pc.w);
+    fallback_ctz(r0, r0);
+    fallback_patch_jmpi(done, _jit->pc.w);
+}
+
+static void
+_fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                r1_reg, r2, r2_reg;
+    jit_word_t         ctz, l32, l16, l8, l4, l2, l1;
+    l32 = fallback_bnei(_jit->pc.w, r1, 0);
+    movi(r0, __WORDSIZE);
+    ctz = fallback_jmpi(_jit->pc.w);
+    fallback_patch_at(l32, _jit->pc.w);
+    r2_reg = jit_get_reg(jit_class_gpr);
+    r2 = rn(r2_reg);
+    r1_reg = jit_get_reg(jit_class_gpr);
+    movr(rn(r1_reg), r1);
+    r1 = rn(r1_reg);
+    movi(r0, 0);
+#  if __WORDSIZE == 64
+    movi(r2, 0xffffffffUL);
+    l32 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 32);
+    addi(r0, r0, 32);
+    fallback_patch_at(l32, _jit->pc.w);
+    rshi(r2, r2, 16);
+#  else
+    movi(r2, 0xffffUL);
+#  endif
+    l16 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 16);
+    addi(r0, r0, 16);
+    fallback_patch_at(l16, _jit->pc.w);
+    rshi(r2, r2, 8);
+    l8 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 8);
+    addi(r0, r0, 8);
+    fallback_patch_at(l8, _jit->pc.w);
+    rshi(r2, r2, 4);
+    l4 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 4);
+    addi(r0, r0, 4);
+    fallback_patch_at(l4, _jit->pc.w);
+    rshi(r2, r2, 2);
+    l2 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 2);
+    addi(r0, r0, 2);
+    fallback_patch_at(l2, _jit->pc.w);
+    rshi(r2, r2, 1);
+    l1 = fallback_bmsr(_jit->pc.w, r1, r2);
+    addi(r0, r0, 1);
+    fallback_patch_at(l1, _jit->pc.w);
+    fallback_patch_jmpi(ctz, _jit->pc.w);
+    jit_unget_reg(r2_reg);
+    jit_unget_reg(r1_reg);
+}
 #endif
index 013460c..ebb01fd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -648,6 +648,10 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #define movi_p(r0,i0)          _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define bswapr_us(r0, r1)            _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0, r1)            _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define movnr(r0,r1,r2)              _movnr(_jit,r0,r1,r2)
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
@@ -663,8 +667,6 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 #define extr_uc(r0,r1)         EXTRWR_U(r1,31,8,r0)
 #define extr_s(r0,r1)          EXTRWR(r1,31,16,r0)
 #define extr_us(r0,r1)         EXTRWR_U(r1,31,16,r0)
-#define bswapr_us(r0,r1)       generic_bswapr_us(_jit,r0,r1)
-#define bswapr_ui(r0,r1)       generic_bswapr_ui(_jit,r0,r1)
 #define addr(r0,r1,r2)         ADD(r1,r2,r0)
 #define addi(r0,r1,i0)         _addi(_jit,r0,r1,i0)
 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -912,7 +914,7 @@ static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #define jmpr(r0)               _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #define jmpi(i0)               _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #define jmpi_p(i0)             _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #define callr(r0)              _callr(_jit,r0)
@@ -1638,6 +1640,42 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     return (w);
 }
 
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       EXTRWR_U(rn(reg), 23, 8, r0);
+       DEPWR(rn(reg), 23, 8, r0);
+       jit_unget_reg(reg);
+    }
+    else {
+       EXTRWR_U(r1, 23, 8, r0);
+       DEPWR(r1, 23, 8, r0);
+    }
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       SHRPWI(rn(reg), rn(reg), 16, r0);
+       DEPWR(r0, 15, 8, r0);
+       SHRPWI(rn(reg), r0, 8, r0);
+       jit_unget_reg(reg);
+    }
+    else {
+       SHRPWI(r1, r1, 16, r0);
+       DEPWR(r0, 15, 8, r0);
+       SHRPWI(r1, r0, 8, r0);
+    }
+}
+
 static void
 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2632,17 +2670,19 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
     BV_N(_R0_REGNO, r0);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
-    w = ((i0 - _jit->pc.w) >> 2) - 2;
-    if (w >= -32768 && w <= 32767)
-       B_N(w, _R0_REGNO);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = ((i0 - w) >> 2) - 2;
+    if (d >= -32768 && d <= 32767)
+       B_N(d, _R0_REGNO);
     else {
-       movi(_R1_REGNO, w);
+       movi(_R1_REGNO, d);
        BV_N(_R0_REGNO, _R1_REGNO);
     }
+    return (w);
 }
 
 static jit_word_t
index 6b2838d..ed141a7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index 33ac908..e41f89c 100644 (file)
@@ -3,9 +3,10 @@
 #define JIT_INSTR_MAX 196
     0, /* data */
     0, /* live */
-    0, /* align */
+    28,        /* align */
     0, /* save */
     0, /* load */
+    0,  /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     20,        /* va_arg_d */
     8, /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    88,        /* casr */
+    96,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    12,        /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     40,        /* callr */
     44,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    28,        /* bswapr_us */
-    68,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    88,        /* casr */
-    96,        /* casi */
+    160,       /* clo */
+    140,       /* clz */
+    164,       /* cto */
+    144,       /* ctz */
 #endif /* __WORDSIZE */
index 2c826d8..d3c5ef7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -245,18 +245,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
+    jit_code_inc_synth_w(code, u);
     jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -310,7 +310,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    assert(u->code == jit_code_arg ||
+    assert((u->code >= jit_code_arg_c && u->code <= jit_code_arg) ||
           u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_reg_p(u->u.w));
 }
@@ -343,17 +343,21 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     _jitc->function->self.size -= sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else
        offset = _jitc->function->self.size;
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -406,7 +410,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (v->u.w >= 0)
        jit_extr_c(u, _R26 - v->u.w);
@@ -418,7 +422,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (v->u.w >= 0)
        jit_extr_uc(u, _R26 - v->u.w);
@@ -430,7 +434,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (v->u.w >= 0)
        jit_extr_s(u, _R26 - v->u.w);
@@ -442,7 +446,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (v->u.w >= 0)
        jit_extr_us(u, _R26 - v->u.w);
@@ -454,7 +458,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (v->u.w >= 0)
        jit_movr(u, _R26 - v->u.w);
@@ -464,10 +468,10 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (v->u.w >= 0)
        jit_movr(_R26 - v->u.w, u);
     else
@@ -476,11 +480,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (v->u.w >= 0)
        jit_movi(_R26 - v->u.w, u);
     else {
@@ -575,10 +579,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     _jitc->function->call.size -= sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -591,11 +595,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     _jitc->function->call.size -= sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -859,6 +863,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -979,6 +984,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1060,6 +1068,14 @@ _emit_code(jit_state_t *_jit)
                break;
                case_rr(neg,);
                case_rr(com,);
+#define clor(r0, r1)   fallback_clo(r0, r1)
+#define clzr(r0, r1)   fallback_clz(r0, r1)
+#define ctor(r0, r1)   fallback_cto(r0, r1)
+#define ctzr(r0, r1)   fallback_ctz(r0, r1)
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
@@ -1339,7 +1355,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (word >= -32768 && word <= 32767)
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
@@ -1368,6 +1389,7 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1388,6 +1410,18 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo.
+                    * Note that for hppa use '-' instead of '+' as hppa
+                    * stack grows up */
+                   undo.func.self.aoff = _jitc->function->frame -
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1409,24 +1443,37 @@ _emit_code(jit_state_t *_jit)
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
-           case jit_code_live:
-           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_getarg_s:             case jit_code_getarg_us:
            case jit_code_getarg_i:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
index 068bc07..98a10c3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -1301,6 +1301,16 @@ static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define bitswap(r0, r1)                        _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+#define clor(r0, r1)                   _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#define clzr(r0, r1)                   _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#define ctor(r0, r1)                   _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#define ctzr(r0, r1)                   _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #define negr(r0,r1)                    subr(r0,0,r1)
 #define comr(r0,r1)                    ANDCMI(r0,-1,r1)
 #define movr(r0,r1)                    _movr(_jit,r0,r1)
@@ -1500,7 +1510,7 @@ static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define jmpr(r0)                       _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #define jmpi(i0)                       _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #define jmpi_p(i0)                     _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #define callr(r0)                      _callr(_jit,r0)
@@ -2456,7 +2466,7 @@ _I9(jit_state_t *_jit, jit_word_t _p,
     TSTREG1(r3);
     TSTPRED(_p);
     TSTREG1(r1);
-    inst((7L<<37)|(1L<<34)|(1L<<34)|(1L<<33)|
+    inst((7L<<37)|(1L<<34)|(1L<<33)|
         (x2<<30)|(1L<<28)|(r3<<20)|(r1<<6)|_p, INST_I);
     SETREG(r1);
 }
@@ -3465,6 +3475,94 @@ _nop(jit_state_t *_jit, jit_int32_t i0)
     assert(i0 == 0);
 }
 
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1, t2, t3, t4;
+    movr(r0, r1);
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+    rshi_u(rn(t1), r0, 1);             /* t1 = v >> 1 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 1);           /* t2 <<= 1 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+    rshi_u(rn(t1), r0, 2);             /* t1 = v >> 2 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 2);           /* t2 <<= 2 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+    rshi_u(rn(t1), r0, 4);             /* t1 = v >> 4 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 4);           /* t2 <<= 4 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ?  0x00ff00ffL : 0x00ff00ff00ff00ffL);
+    rshi_u(rn(t1), r0, 8);             /* t1 = v >> 8 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 8);           /* t2 <<= 8 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), 0x0000ffff0000ffffL);
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 16);          /* t2 <<= 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    rshi_u(rn(t1), r0, 32);            /* t1 = v >> 32 */
+    lshi(rn(t2), r0, 32);              /* t2 = v << 32 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.clz)
+       CLZ(r0, r1);
+    else
+       fallback_clz(r0, r1);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.clz) {
+       comr(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_clo(r0, r1);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.clz) {
+       bitswap(r0, r1);
+       clor(r0, r0);
+    }
+    else
+       fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.clz) {
+       bitswap(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_ctz(r0, r1);
+}
+
 static void
 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -5145,16 +5243,18 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
     BR(BR_6);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         d;
+    jit_word_t         d, w;
     sync();
-    d = ((jit_word_t)i0 - _jit->pc.w) >> 4;
+    w = _jit->pc.w;
+    d = ((jit_word_t)i0 - w) >> 4;
     if (d >= -16777216 && d <= 16777215)
        BRI(d);
     else
        BRL(d);
+    return (w);
 }
 
 static jit_word_t
@@ -5400,14 +5500,16 @@ _patch_at(jit_state_t *_jit, jit_code_t code,
            i1  = (ic >> 61) &           0x1L;
            i41 = (ic >> 22) & 0x1ffffffffffL;
            i20 =  ic        &       0xfffffL;
-           assert((tm & ~1) == TM_M_L_X_ &&
+           if (!((tm & ~1) == TM_M_L_X_ &&
                   (s2 & 0xfL<<37) == (0xcL<<37) &&
-                  s0 == nop_m);
+                 s0 == nop_m))
+               goto short_jump;
            s1 = i41;
            s2 &= (0xcL<<37)|(0x7L<<33)|(1L<<12);
            s2 |= (i1<<36)|(i20<<13);
            break;
        default:
+       short_jump:
            /* Only B1 in slot 0 expected due to need to either
             * a stop to update predicates, or a sync before
             * unconditional short branch */
index 344977e..f0fb32c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index e65da54..e1d973c 100644 (file)
@@ -1,10 +1,11 @@
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 224
+#define JIT_INSTR_MAX 608
     0, /* data */
     0, /* live */
-    0, /* align */
+    48,        /* align */
     0, /* save */
     0, /* load */
+    16,        /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     16,        /* va_start */
     32,        /* va_arg */
     32,        /* va_arg_d */
     16,        /* movi */
     16,        /* movnr */
     16,        /* movzr */
+    48,        /* casr */
+    64,        /* casi */
     16,        /* extr_c */
     16,        /* extr_uc */
     16,        /* extr_s */
     16,        /* extr_us */
     16,        /* extr_i */
     16,        /* extr_ui */
+    32,        /* bswapr_us */
+    32,        /* bswapr_ui */
+    16,        /* bswapr_ul */
     32,        /* htonr_us */
     32,        /* htonr_ui */
     16,        /* htonr_ul */
     32,        /* callr */
     48,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     16,        /* movr_d_w */
     32,        /* movi_d_w */
-    32,        /* bswapr_us */
-    32,        /* bswapr_ui */
-    16,        /* bswapr_ul */
-    48,        /* casr */
-    64,        /* casi */
+    608,       /* clo */
+    544,       /* clz */
+    608,       /* cto */
+    544,       /* ctz */
 #endif /* __WORDSIZE */
index 1c35fb1..2968278 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -58,6 +58,7 @@ extern void __clear_cache(void *, void *);
 /*
  * Initialization
  */
+jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
     /* Always 0 */
     { 0,                "r0"  },
@@ -239,6 +240,11 @@ jit_register_t             _rvs[] = {
 void
 jit_get_cpu(void)
 {
+    jit_word_t         clz = -1;
+    __asm__ volatile("tf.nz.unc p6,p7=32;(p6)mov %0=1;(p7)mov %0=0"
+                    : "=r" (clz));
+    assert(clz == 0 || clz == 1);
+    jit_cpu.clz = clz;
 }
 
 void
@@ -345,18 +351,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
+    jit_code_inc_synth_w(code, u);
     jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -410,9 +416,10 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    assert(u->code == jit_code_arg ||
-          u->code == jit_code_arg_f || u->code == jit_code_arg_d);
-    return (jit_arg_reg_p(u->u.w));
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
 }
 
 void
@@ -442,18 +449,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -508,7 +519,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _R32 + v->u.w);
@@ -520,7 +531,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _R32 + v->u.w);
@@ -532,7 +543,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _R32 + v->u.w);
@@ -544,7 +555,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _R32 + v->u.w);
@@ -556,7 +567,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, _R32 + v->u.w);
@@ -568,7 +579,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _R32 + v->u.w);
@@ -580,7 +591,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _R32 + v->u.w);
@@ -590,10 +601,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_R32 + v->u.w, u);
     else
@@ -602,11 +613,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_R32 + v->u.w, u);
     else {
@@ -713,10 +724,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_OUT0 + _jitc->function->call.argi, u);
@@ -730,11 +741,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_OUT0 + _jitc->function->call.argi, u);
@@ -973,6 +984,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1122,6 +1134,10 @@ _emit_code(jit_state_t *_jit)
                if (node->u.w > 8)
                    nop(node->u.w - 8);
                break;
+           case jit_code_skip:
+               sync();
+               nop((node->u.w + 7) & ~7);
+               break;
            case jit_code_note:         case jit_code_name:
                sync();
                node->u.w = _jit->pc.w;
@@ -1177,6 +1193,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
            case jit_code_casr:
                casr(rn(node->u.w), rn(node->v.w),
                     rn(node->w.q.l), rn(node->w.q.h));
@@ -1504,7 +1524,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (word  >= -16777216 && word <= 16777215)
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
@@ -1533,6 +1558,7 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1571,6 +1597,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1599,14 +1635,21 @@ _emit_code(jit_state_t *_jit)
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
-           case jit_code_live:
-           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:                case jit_code_arg_l:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1614,10 +1657,22 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
index 052d9ac..ab05852 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022  Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -31,7 +31,6 @@
 # define _RA_REGNO                     1
 # define _SP_REGNO                     3
 # define _FP_REGNO                     22
-# define stack_framesize               160
 # define ldr(u, v)                     ldr_l(u, v)
 # define ldi(u, v)                     ldi_l(u, v)
 # define ldxi(u, v, w)                 ldxi_l(u, v, w)
@@ -335,6 +334,10 @@ static void _oj26(jit_state_t*, jit_int32_t,jit_int32_t);
 # define nop(i0)                       _nop(_jit, i0)
 # define comr(r0, r1)                  NOR(r0, r1, r1)
 # define negr(r0, r1)                  subr(r0, _ZERO_REGNO, r1)
+# define clor(r0, r1)                  CLO_D(r0, r1)
+# define clzr(r0, r1)                  CLZ_D(r0, r1)
+# define ctor(r0, r1)                  CTO_D(r0, r1)
+# define ctzr(r0, r1)                  CTZ_D(r0, r1)
 static void _nop(jit_state_t*,jit_int32_t);
 # define movr(r0, r1)                  _movr(_jit, r0, r1)
 static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
@@ -580,7 +583,7 @@ static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
 # define jmpr(r0)                      JIRL(_ZERO_REGNO, r0, 0)
 # define jmpi(i0)                      _jmpi(_jit, i0)
-static void _jmpi(jit_state_t*, jit_word_t);
+static jit_word_t _jmpi(jit_state_t*, jit_word_t);
 # define jmpi_p(i0)                    _jmpi_p(_jit, i0)
 static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
 # define boaddr(i0, r0, r1)            _boaddr(_jit, i0, r0, r1)
@@ -625,7 +628,7 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 # define callr(r0)                     JIRL(_RA_REGNO, r0, 0)
 # define calli(i0)                     _calli(_jit, i0)
-static void _calli(jit_state_t*, jit_word_t);
+static jit_word_t _calli(jit_state_t*, jit_word_t);
 # define calli_p(i0)                   _calli_p(_jit, i0)
 static jit_word_t _calli_p(jit_state_t*, jit_word_t);
 # define prolog(i0)                    _prolog(_jit, i0)
@@ -2134,15 +2137,17 @@ _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     return (w);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
-    w = (i0 - _jit->pc.w) >> 2;
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
     if (can_sign_extend_si26_p(i0))
-       B(w);
+       B(d);
     else
-       (void)jmpi_p(i0);
+       w = jmpi_p(i0);
+    return (w);
 }
 
 static jit_word_t
@@ -2501,15 +2506,17 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     return (w);
 }
 
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
-    w = (i0 - _jit->pc.w) >> 2;
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
     if (can_sign_extend_si26_p(i0))
-       BL(w);
+       BL(d);
     else
-       (void)calli_p(i0);
+       w = calli_p(i0);
+    return (w);
 }
 
 static jit_word_t
@@ -2527,9 +2534,10 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -2540,44 +2548,41 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
-    subi(_SP_REGNO, _SP_REGNO, stack_framesize);
-    stxi(0, _SP_REGNO, _RA_REGNO);
-    stxi(8, _SP_REGNO, _FP_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S0))
-       stxi(16, _SP_REGNO, rn(_S0));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
-       stxi(24, _SP_REGNO, rn(_S1));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
-       stxi(32, _SP_REGNO, rn(_S2));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
-       stxi(40, _SP_REGNO, rn(_S3));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
-       stxi(48, _SP_REGNO, rn(_S4));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
-       stxi(56, _SP_REGNO, rn(_S5));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
-       stxi(64, _SP_REGNO, rn(_S6));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
-       stxi(72, _SP_REGNO, rn(_S7));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
-       stxi(80, _SP_REGNO, rn(_S8));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
-       stxi_d(88, _SP_REGNO, rn(_FS0));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
-       stxi_d(96, _SP_REGNO, rn(_FS1));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
-       stxi_d(104, _SP_REGNO, rn(_FS2));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
-       stxi_d(112, _SP_REGNO, rn(_FS3));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
-       stxi_d(120, _SP_REGNO, rn(_FS4));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
-       stxi_d(128, _SP_REGNO, rn(_FS5));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
-       stxi_d(136, _SP_REGNO, rn(_FS6));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
-       stxi_d(144, _SP_REGNO, rn(_FS7));
-    movr(_FP_REGNO, _SP_REGNO);
+
+    if (_jitc->function->stack)
+       _jitc->function->need_stack = 1;
+    if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               _jitc->function->need_stack = 1;
+               break;
+           }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+    if (_jitc->function->need_frame) {
+       stxi(0, _SP_REGNO, _RA_REGNO);
+       stxi(8, _SP_REGNO, _FP_REGNO);
+    }
+    /* callee save registers */
+    for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           stxi(offs, _SP_REGNO, rn(iregs[reg]));
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame)
+       movr(_FP_REGNO, _SP_REGNO);
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
@@ -2588,7 +2593,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     }
     if (_jitc->function->self.call & jit_call_varargs) {
        for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
-           stxi(stack_framesize - ((8 - reg) * 8),
+           stxi(jit_framesize() - ((8 - reg) * 8),
                 _FP_REGNO, rn(JIT_RA0 - reg));
     }
 }
@@ -2596,46 +2601,31 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, offs;
     if (_jitc->function->assume_frame)
        return;
-    movr(_SP_REGNO, _FP_REGNO);
-    ldxi(_RA_REGNO, _SP_REGNO, 0);
-    ldxi(_FP_REGNO, _SP_REGNO, 8);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S0))
-       ldxi(rn(_S0), _SP_REGNO, 16);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
-       ldxi(rn(_S1), _SP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
-       ldxi(rn(_S2), _SP_REGNO, 32);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
-       ldxi(rn(_S3), _SP_REGNO, 40);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
-       ldxi(rn(_S4), _SP_REGNO, 48);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
-       ldxi(rn(_S5), _SP_REGNO, 56);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
-       ldxi(rn(_S6), _SP_REGNO, 64);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
-       ldxi(rn(_S7), _SP_REGNO, 72);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
-       ldxi(rn(_S8), _SP_REGNO, 80);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
-       ldxi_d(rn(_FS0), _SP_REGNO, 88);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
-       ldxi_d(rn(_FS1), _SP_REGNO, 96);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
-       ldxi_d(rn(_FS2), _SP_REGNO, 104);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
-       ldxi_d(rn(_FS3), _SP_REGNO, 112);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
-       ldxi_d(rn(_FS4), _SP_REGNO, 120);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
-       ldxi_d(rn(_FS5), _SP_REGNO, 128);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
-       ldxi_d(rn(_FS6), _SP_REGNO, 136);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
-       ldxi_d(rn(_FS7), _SP_REGNO, 144);
-    addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    if (_jitc->function->need_frame) {
+       movr(_SP_REGNO, _FP_REGNO);
+       ldxi(_RA_REGNO, _SP_REGNO, 0);
+       ldxi(_FP_REGNO, _SP_REGNO, 8);
+    }
+
+    /* callee save registers */
+    for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       addi(_SP_REGNO, _SP_REGNO, jit_framesize());
     JIRL(_ZERO_REGNO, _RA_REGNO, 0);
 }
 
@@ -2645,9 +2635,9 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     assert(_jitc->function->self.call & jit_call_varargs);
     /* Initialize va_list to the first stack argument. */
     if (jit_arg_reg_p(_jitc->function->vagp))
-       addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8));
+       addi(r0, _FP_REGNO, jit_framesize() - ((8 - _jitc->function->vagp) * 8));
     else
-       addi(r0, _FP_REGNO, _jitc->function->self.size);
+       addi(r0, _FP_REGNO, jit_selfsize());
 }
 
 static void
index 5874afd..2871de3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022  Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index 2490cfa..4b95047 100644 (file)
@@ -5,6 +5,7 @@
     28,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     16,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    32,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    8, /* bswapr_us */
+    8, /* bswapr_ui */
+    4, /* bswapr_ul */
     8, /* htonr_us */
     8, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
-    16,        /* ldi_c */
+    20,        /* ldi_c */
     4, /* ldr_uc */
-    16,        /* ldi_uc */
+    20,        /* ldi_uc */
     4, /* ldr_s */
-    16,        /* ldi_s */
+    20,        /* ldi_s */
     4, /* ldr_us */
-    16,        /* ldi_us */
+    20,        /* ldi_us */
     4, /* ldr_i */
-    16,        /* ldi_i */
+    20,        /* ldi_i */
     4, /* ldr_ui */
-    16,        /* ldi_ui */
+    20,        /* ldi_ui */
     4, /* ldr_l */
-    16,        /* ldi_l */
+    20,        /* ldi_l */
     4, /* ldxr_c */
     16,        /* ldxi_c */
     4, /* ldxr_uc */
     4, /* ldxr_l */
     16,        /* ldxi_l */
     4, /* str_c */
-    16,        /* sti_c */
+    20,        /* sti_c */
     4, /* str_s */
-    16,        /* sti_s */
+    20,        /* sti_s */
     4, /* str_i */
-    16,        /* sti_i */
+    20,        /* sti_i */
     4, /* str_l */
-    16,        /* sti_l */
+    20,        /* sti_l */
     4, /* stxr_c */
     16,        /* stxi_c */
     4, /* stxr_s */
     4, /* callr */
     20,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     4, /* movr_f */
     8, /* movi_f */
     4, /* ldr_f */
-    16,        /* ldi_f */
+    20,        /* ldi_f */
     4, /* ldxr_f */
     16,        /* ldxi_f */
     4, /* str_f */
-    16,        /* sti_f */
+    20,        /* sti_f */
     4, /* stxr_f */
     16,        /* stxi_f */
     8, /* bltr_f */
     4, /* movr_d */
     16,        /* movi_d */
     4, /* ldr_d */
-    16,        /* ldi_d */
+    20,        /* ldi_d */
     4, /* ldxr_d */
     16,        /* ldxi_d */
     4, /* str_d */
-    16,        /* sti_d */
+    20,        /* sti_d */
     4, /* stxr_d */
     16,        /* stxi_d */
     8, /* bltr_d */
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
-    8, /* bswapr_us */
-    8, /* bswapr_ui */
-    4, /* bswapr_ul */
-    32,        /* casr */
-    44,        /* casi */
+    4, /* clo */
+    4, /* clz */
+    4, /* cto */
+    4, /* ctz */
 #endif /* __WORDSIZE */
index 78fac47..c9b5b8c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022  Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *     Paulo Cesar Pereira de Andrade
  */
 
+/* callee save                         + variadic arguments
+ * align16(ra+fp+s[0-8]+fs[0-7])       +       align16(a[0-7]) */
+#define stack_framesize                        (144 + 64)
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
 
@@ -28,6 +32,8 @@ typedef struct jit_pointer_t jit_va_list_t;
 /*
  * Prototypes
  */
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
@@ -107,6 +113,14 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8
+};
+
+static jit_int32_t fregs[] = {
+    _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7
+};
+
 /*
  * Implementation
  */
@@ -167,6 +181,7 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -215,20 +230,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -288,16 +301,17 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
-    return (jit_arg_f_reg_p(u->u.w));
+    return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
 }
 
 void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare) {
        jit_link_prepare();
        assert(!(_jitc->function->call.call & jit_call_varargs));
@@ -321,19 +335,23 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
     assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -355,6 +373,7 @@ _jit_arg_f(jit_state_t *_jit)
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
@@ -378,6 +397,7 @@ _jit_arg_d(jit_state_t *_jit)
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
@@ -388,111 +408,129 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _A0 - v->u.w);
-    else
-       jit_ldxi_c(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_c(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _A0 - v->u.w);
-    else
-       jit_ldxi_uc(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _A0 - v->u.w);
-    else
-       jit_ldxi_s(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_s(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _A0 - v->u.w);
-    else
-       jit_ldxi_us(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_us(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, _A0 - v->u.w);
-    else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_i(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _A0 - v->u.w);
-    else
-       jit_ldxi_ui(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _A0 - v->u.w);
-    else
-       jit_ldxi_l(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_A0 - v->u.w, u);
-    else
-       jit_stxi(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_A0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, JIT_FP, regno);
+       node = jit_stxi(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -507,8 +545,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(u, _FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
-    else
-       jit_ldxi_f(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_f(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -521,8 +561,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
-    else
-       jit_stxi_f(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_f(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -534,18 +576,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
     jit_inc_synth_fp(putargi_f, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(_FA0 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       union {
-           jit_float32_t       f;
-           jit_int32_t         i;
-       } uu;
-       uu.f = u;
-       jit_movi(JIT_RA0 - (v->u.w - 8), uu.i);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, JIT_FP, regno);
+       node = jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -560,8 +598,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(u, _FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
-    else
-       jit_ldxi_d(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_d(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -574,8 +614,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
-    else
-       jit_stxi_d(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_d(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -587,28 +629,24 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
     jit_inc_synth_dp(putargi_d, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_d(_FA0 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       union {
-           jit_float64_t       d;
-           jit_int64_t         w;
-       } uu;
-       uu.d = u;
-       jit_movi(JIT_RA0 - (v->u.w - 8), uu.w);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, JIT_FP, regno);
+       node = jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_A0 - _jitc->function->call.argi, u);
@@ -617,16 +655,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_A0 - _jitc->function->call.argi, u);
@@ -638,6 +677,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -660,6 +700,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -686,6 +727,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -708,6 +750,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -734,6 +777,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -761,6 +805,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -778,6 +823,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -877,6 +923,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1015,6 +1062,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1062,6 +1112,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1383,6 +1437,7 @@ _emit_code(jit_state_t *_jit)
                case_brr(bunord, _d);
                case_brd(bunord);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -1393,14 +1448,22 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (can_sign_extend_si26_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
+               }
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
@@ -1411,22 +1474,32 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
-                       word = calli_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (can_sign_extend_si26_p(word))
+                           word = calli(_jit->pc.w);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    calli(node->u.w);
+               }
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
+               compute_framesize();
+               patch_alist(0);
                _jitc->again = 0;
                prolog(node);
                break;
@@ -1442,10 +1515,25 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* this will be recomputed but undo anyway to have it
+                    * better self documented.*/
+                   undo.func.need_stack = _jitc->function->need_stack;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                if (node->link && (word = _jit->pc.w & 3))
@@ -1488,11 +1576,18 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:                case jit_code_arg_l:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1500,10 +1595,22 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:
            case jit_code_getarg_ui:            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1600,6 +1707,27 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     stxi_d(i0, rn(r0), rn(r1));
 }
 
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = 16;     /* ra+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += sizeof(jit_word_t);
+
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+
+    /* Space to store variadic arguments */
+    if (_jitc->function->self.call & jit_call_varargs)
+       _jitc->framesize += (8 - _jitc->function->vagp) * 8;
+
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
index e4e5deb..8e736da 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index f52d6dc..0b1b3b4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -27,11 +27,11 @@ typedef union {
     struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } ft;
     struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } rd;
     struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } fs;
+    struct {   jit_uint32_t _: 7;      jit_uint32_t b :  9; } i9;
     struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } ic;
     struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } fd;
-    struct {   jit_uint32_t _: 6;      jit_uint32_t b : 10; } tr;
-    struct {   jit_uint32_t _: 6;      jit_uint32_t b : 20; } br;
     struct {                           jit_uint32_t b :  6; } tc;
+    struct {                           jit_uint32_t b :  5; } cn;
     struct {                           jit_uint32_t b : 11; } cc;
     struct {                           jit_uint32_t b : 16; } is;
     struct {                           jit_uint32_t b : 26; } ii;
@@ -43,22 +43,19 @@ typedef union {
     struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } ft;
     struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } rd;
     struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } fs;
+    struct {   jit_uint32_t _:16;      jit_uint32_t b :  9; } i9;
     struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } ic;
     struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } fd;
-    struct {   jit_uint32_t _:21;      jit_uint32_t b : 10; } tr;
-    struct {   jit_uint32_t _:21;      jit_uint32_t b : 20; } br;
     struct {   jit_uint32_t _:26;      jit_uint32_t b :  6; } tc;
+    struct {   jit_uint32_t _:27;      jit_uint32_t b :  5; } cn;
     struct {   jit_uint32_t _:21;      jit_uint32_t b : 11; } cc;
     struct {   jit_uint32_t _:16;      jit_uint32_t b : 16; } is;
     struct {   jit_uint32_t _: 6;      jit_uint32_t b : 26; } ii;
 #endif
     int                                        op;
 } jit_instr_t;
-#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
-#  define jit_mips2_p()                        1
-#else
-#  define jit_mips2_p()                        0
-#endif
+#define jit_mips2_p()                  (jit_cpu.release >= 2)
+#define jit_mips6_p()                  (jit_cpu.release >= 6)
 #  define _ZERO_REGNO                  0
 #  define _T0_REGNO                    0x08
 #  define _T1_REGNO                    0x09
@@ -90,24 +87,20 @@ typedef union {
 #  define _F28_REGNO                   28
 #  define _F30_REGNO                   30
 #  if __WORDSIZE == 32
-#    if NEW_ABI
-#      define stack_framesize          144
-#    else
-#      define stack_framesize          112
-#    endif
 #    define ldr(u,v)                   ldr_i(u,v)
 #    define ldi(u,v)                   ldi_i(u,v)
 #    define ldxi(u,v,w)                        ldxi_i(u,v,w)
 #    define sti(u,v)                   sti_i(u,v)
 #    define stxi(u,v,w)                        stxi_i(u,v,w)
 #  else
-#    define stack_framesize            144
 #    define ldr(u,v)                   ldr_l(u,v)
 #    define ldi(u,v)                   ldi_l(u,v)
 #    define ldxi(u,v,w)                        ldxi_l(u,v,w)
 #    define sti(u,v)                   sti_l(u,v)
 #    define stxi(u,v,w)                        stxi_l(u,v,w)
 #  endif
+/* can_relative_jump_p(im) => can_sign_extend_short_p(im << 2) */
+#  define can_relative_jump_p(im)      ((im) >= -130712 && (im) <= 131068)
 #  define can_sign_extend_short_p(im)  ((im) >= -32678 && (im) <= 32767)
 #  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
 #  define is_low_mask(im)              (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0)
@@ -195,6 +188,8 @@ typedef union {
 #  define MIPS_CT                      0x06
 #  define MIPS_MTH                     0x07
 #  define MIPS_BC                      0x08
+#  define MIPS_BC1EQZ                  0x09    /* release 6 */
+#  define MIPS_BC1NEZ                  0x0d    /* release 6 */
 #  define MIPS_WRPGPR                  0x0e
 #  define MIPS_BGZAL                   0x11
 #  define MIPS_MFMC0                   0x11
@@ -303,17 +298,32 @@ typedef union {
 #  define MIPS_DSRA32                  0x3f
 #  define MIPS_SDBPP                   0x3f
 #  define ii(i)                                *_jit->pc.ui++ = i
+#  define instr(op)                    _instr(_jit, op)
+static void _instr(jit_state_t*, jit_int32_t);
+#  define flush()                      _flush(_jit)
+static void _flush(jit_state_t*);
+#  define pending()                    _pending(_jit)
+static jit_int32_t _pending(jit_state_t*);
+#  define delay(op)                    _delay(_jit,op)
+static void _delay(jit_state_t*,jit_int32_t);
+#  define jit_get_reg_for_delay_slot(mask, r0,r1)                      \
+       _jit_get_reg_for_delay_slot(_jit,mask,r0,r1)
+static jit_int32_t _jit_get_reg_for_delay_slot(jit_state_t*,jit_int32_t,
+                                              jit_int32_t, jit_int32_t);
+#  define hrrrit(hc,rs,rt,rd,im,tc)    _hrrrit(_jit,hc,rs,rt,rd,im,tc)
 static void
 _hrrrit(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
        jit_int32_t,jit_int32_t);
-#  define hrrrit(hc,rs,rt,rd,im,tc)    _hrrrit(_jit,hc,rs,rt,rd,im,tc)
 #  define hrrr_t(hc,rs,rt,rd,tc)       hrrrit(hc,rs,rt,rd,0,tc)
 #  define rrr_t(rs,rt,rd,tc)           hrrr_t(0,rs,rt,rd,tc)
 #  define hrri(hc,rs,rt,im)            _hrri(_jit,hc,rs,rt,im)
 static void _hrri(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define hrri9(hc,rs,rt,i9,tc)                _hrri9(_jit,hc,rs,rt,i9,tc)
+static void _hrri9(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
 #  define hi(hc,im)                    _hi(_jit,hc,im)
 static void _hi(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define NOP(i0)                      ii(0)
+#  define NOP(i0)                      instr(0)
 #  define nop(i0)                      _nop(_jit,i0)
 static void _nop(jit_state_t*,jit_int32_t);
 #  define h_ri(hc,rt,im)               _hrri(_jit,hc,0,rt,im)
@@ -327,13 +337,29 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define DSUBU(rd,rs,rt)              rrr_t(rs,rt,rd,MIPS_DSUBU)
 #  define MUL(rd,rs,rt)                        hrrr_t(MIPS_SPECIAL2,rs,rt,rd,MIPS_MUL)
 #  define MULT(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT)
+#  define MUL_R6(rd,rs,rt)             hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 24)
+#  define MUH_R6(rd,rs,rt)             hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 24)
 #  define MULTU(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU)
+#  define MULU_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 25)
+#  define MUHU_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 25)
 #  define DMULT(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT)
+#  define DMUL_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 28)
+#  define DMUH_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 28)
 #  define DMULTU(rs,rt)                        rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULTU)
+#  define DMULU_R6(rd,rs,rt)           hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 29)
+#  define DMUHU_R6(rd,rs,rt)           hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 29)
 #  define DIV(rs,rt)                   rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIV)
+#  define DIV_R6(rd,rs,rt)             hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 26)
+#  define MOD_R6(rd,rs,rt)             hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 26)
 #  define DIVU(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIVU)
+#  define DIVU_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 27)
+#  define MODU_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 27)
 #  define DDIV(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIV)
+#  define DDIV_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 30)
+#  define DMOD_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 30)
 #  define DDIVU(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIVU)
+#  define DDIVU_R6(rd,rs,rt)           hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 31)
+#  define DMODU_R6(rd,rs,rt)           hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 31)
 #  define SLLV(rd,rt,rs)               rrr_t(rs,rt,rd,MIPS_SLLV)
 #  define SLL(rd,rt,sa)                        rrit(rt,rd,sa,MIPS_SLL)
 #  define DSLLV(rd,rt,rs)              rrr_t(rs,rt,rd,MIPS_DSLLV)
@@ -368,6 +394,7 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define ANDI(rt,rs,im)               hrri(MIPS_ANDI,rs,rt,im)
 #  define OR(rd,rs,rt)                 rrr_t(rs,rt,rd,MIPS_OR)
 #  define ORI(rt,rs,im)                        hrri(MIPS_ORI,rs,rt,im)
+#  define NOR(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_NOR)
 #  define XOR(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_XOR)
 #  define XORI(rt,rs,im)               hrri(MIPS_XORI,rs,rt,im)
 #  define LB(rt,of,rb)                 hrri(MIPS_LB,rb,rt,of)
@@ -378,13 +405,17 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define LWU(rt,of,rb)                        hrri(MIPS_LWU,rb,rt,of)
 #  define LD(rt,of,rb)                 hrri(MIPS_LD,rb,rt,of)
 #  define LL(rt,of,rb)                 hrri(MIPS_LL,rb,rt,of)
+#  define LL_R6(rt,of,rb)              hrri9(MIPS_SPECIAL3,rb,rt,of,54)
 #  define LLD(rt,of,rb)                        hrri(MIPS_LLD,rb,rt,of)
+#  define LLD_R6(rt,of,rb)             hrri9(MIPS_SPECIAL3,rb,rt,of,55)
 #  define SB(rt,of,rb)                 hrri(MIPS_SB,rb,rt,of)
 #  define SH(rt,of,rb)                 hrri(MIPS_SH,rb,rt,of)
 #  define SW(rt,of,rb)                 hrri(MIPS_SW,rb,rt,of)
 #  define SD(rt,of,rb)                 hrri(MIPS_SD,rb,rt,of)
 #  define SC(rt,of,rb)                 hrri(MIPS_SC,rb,rt,of)
+#  define SC_R6(rt,of,rb)              hrri9(MIPS_SPECIAL3,rb,rt,of,38)
 #  define SCD(rt,of,rb)                        hrri(MIPS_SCD,rb,rt,of)
+#  define SCD_R6(rt,of,rb)             hrri9(MIPS_SPECIAL3,rb,rt,of,39)
 #  define WSBH(rd,rt)                  hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL)
 #  define SEB(rd,rt)                   hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL)
 #  define SEH(rd,rt)                   hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL)
@@ -398,34 +429,73 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define BGEZ(rs,im)                  hrri(MIPS_REGIMM,rs,MIPS_BGEZ,im)
 #  define BGTZ(rs,im)                  hrri(MIPS_BGTZ,rs,_ZERO_REGNO,im)
 #  define BNE(rs,rt,im)                        hrri(MIPS_BNE,rs,rt,im)
+#  define BGEZAL(rs,im)                        hrri(MIPS_REGIMM,rs,MIPS_BGEZAL,im)
 #  define JALR(r0)                     hrrrit(MIPS_SPECIAL,r0,0,_RA_REGNO,0,MIPS_JALR)
-#  if 1 /* supports MIPS32 R6 */
-#   define JR(r0)                      hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JALR)
-#  else /* does not support MIPS32 R6 */
-#   define JR(r0)                      hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
+#  if 1                /* This should work for mips r6 or older */
+#    define JR(r0)                     hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JALR)
+#  else                /* This should generate an illegal instruction in mips r6 */
+#    define JR(r0)                     hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
 #  endif
+#  define CLO_R6(rd,rs)                        hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x11)
+#  define DCLO_R6(rd,rs)               hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x13)
+#  define CLZ_R6(rd,rs)                        hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x10)
+#  define DCLZ_R6(rd,rs)               hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x12)
+#  define BITSWAP(rd,rt)               hrrrit(MIPS_SPECIAL3,0,rt,rd,0,0x20)
+#  define DBITSWAP(rd,rt)              hrrrit(MIPS_SPECIAL3,0,rt,rd,0,0x24)
+#  define CLO(rd,rs)                   hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_CLO)
+#  define DCLO(rd,rs)                  hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_DCLO)
+#  define CLZ(rd,rs)                   hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_CLZ)
+#  define DCLZ(rd,rs)                  hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_DCLZ)
 #  define J(i0)                                hi(MIPS_J,i0)
 #  define JAL(i0)                      hi(MIPS_JAL,i0)
 #  define MOVN(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVN)
 #  define MOVZ(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVZ)
+#  define SELEQZ(rd,rs,rt)             hrrrit(0,rs,rt,rd,0,53)
+#  define SELNEZ(rd,rs,rt)             hrrrit(0,rs,rt,rd,0,55)
 #  define comr(r0,r1)                  xori(r0,r1,-1)
 #  define negr(r0,r1)                  subr(r0,_ZERO_REGNO,r1)
+#  define bitswap(r0,r1)               _bitswap(_jit, r0, r1);
+static void _bitswap(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  if __WORDSIZE == 32
 #    define addr(rd,rs,rt)             ADDU(rd,rs,rt)
 #    define addiu(r0,r1,i0)            ADDIU(r0,r1,i0)
 #    define subr(rd,rs,rt)             SUBU(rd,rs,rt)
 #    define mult(rs,rt)                        MULT(rs,rt)
+#    define mul_r6(rd,rs,rt)           MUL_R6(rd,rs,rt)
+#    define muh_r6(rd,rs,rt)           MUH_R6(rd,rs,rt)
 #    define multu(rs,rt)               MULTU(rs,rt)
+#    define mulu_r6(rd,rs,rt)          MULU_R6(rd,rs,rt)
+#    define muhu_r6(rd,rs,rt)          MUHU_R6(rd,rs,rt)
 #    define div(rs,rt)                 DIV(rs,rt)
 #    define divu(rs,rt)                        DIVU(rs,rt)
+#    define div_r6(rd,rs,rt)           DIV_R6(rd,rs,rt)
+#    define divu_r6(rd,rs,rt)          DIVU_R6(rd,rs,rt)
+#    define mod_r6(rd,rs,rt)           MOD_R6(rd,rs,rt)
+#    define modu_r6(rd,rs,rt)          MODU_R6(rd,rs,rt)
 #  else
 #    define addr(rd,rs,rt)             DADDU(rd,rs,rt)
 #    define addiu(r0,r1,i0)            DADDIU(r0,r1,i0)
 #    define subr(rd,rs,rt)             DSUBU(rd,rs,rt)
 #    define mult(rs,rt)                        DMULT(rs,rt)
+#    define mul_r6(rd,rs,rt)           DMUL_R6(rd,rs,rt)
+#    define muh_r6(rd,rs,rt)           DMUH_R6(rd,rs,rt)
 #    define multu(rs,rt)               DMULTU(rs,rt)
+#    define mulu_r6(rd,rs,rt)          DMULU_R6(rd,rs,rt)
+#    define muhu_r6(rd,rs,rt)          DMUHU_R6(rd,rs,rt)
 #    define div(rs,rt)                 DDIV(rs,rt)
 #    define divu(rs,rt)                        DDIVU(rs,rt)
+#    define div_r6(rd,rs,rt)           DDIV_R6(rd,rs,rt)
+#    define divu_r6(rd,rs,rt)          DDIVU_R6(rd,rs,rt)
+#    define mod_r6(rd,rs,rt)           DMOD_R6(rd,rs,rt)
+#    define modu_r6(rd,rs,rt)          DMODU_R6(rd,rs,rt)
 #  endif
 #  define extr(rd,rt,lsb,nb)   _extr(_jit,rd,rt,lsb,nb)
 static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
@@ -526,8 +596,10 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
-#  define movnr(r0,r1,r2)              MOVN(r0, r1, r2)
-#  define movzr(r0,r1,r2)              MOVZ(r0, r1, r2)
+#  define movnr(r0, r1, r2)            _movnr(_jit, r0, r1, r2)
+static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define movzr(r0, r1, r2)            _movzr(_jit, r0, r1, r2)
+static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
 static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
                  jit_int32_t,jit_int32_t,jit_word_t);
@@ -672,50 +744,44 @@ static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#define bltr(i0,r0,r1)                 _bltr(_jit,i0,r0,r1)
-static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bltr_u(i0,r0,r1)               _bltr_u(_jit,i0,r0,r1)
-static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define blti(i0,r0,i1)                 _blti(_jit,i0,r0,i1)
-static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define blti_u(i0,r0,i1)               _blti_u(_jit,i0,r0,i1)
-static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bler(i0,r0,r1)                 _bler(_jit,i0,r0,r1)
-static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bler_u(i0,r0,r1)               _bler_u(_jit,i0,r0,r1)
-static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define blei(i0,r0,i1)                 _blei(_jit,i0,r0,i1)
-static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define blei_u(i0,r0,i1)               _blei_u(_jit,i0,r0,i1)
-static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bltr(i0,r0,r1)                 bger(i0,r1,r0)
+#define bltr_u(i0,r0,r1)               bger_u(i0,r1,r0)
+#define blti(i0,r0,i1)                 _bgei(_jit,i0,r0,i1,0,1)
+#define blti_u(i0,r0,i1)               _bgei(_jit,i0,r0,i1,1,1)
+#define bler(i0,r0,r1)                 _bgtr(_jit,i0,r1,r0,0,1)
+#define bler_u(i0,r0,r1)               _bgtr(_jit,i0,r1,r0,1,1)
+#define blei(i0,r0,i1)                 _bgti(_jit,i0,r0,i1,0,1)
+#define blei_u(i0,r0,i1)               _bgti(_jit,i0,r0,i1,1,1)
 #define beqr(i0,r0,r1)                 _beqr(_jit,i0,r0,r1)
 static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define beqi(i0,r0,i1)                 _beqi(_jit,i0,r0,i1)
 static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1)
-static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bger_u(i0,r0,r1)               _bger_u(_jit,i0,r0,r1)
-static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1)
-static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgei_u(i0,r0,i1)               _bgei_u(_jit,i0,r0,i1)
-static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgtr(i0,r0,r1)                 _bgtr(_jit,i0,r0,r1)
-static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgtr_u(i0,r0,r1)               _bgtr_u(_jit,i0,r0,r1)
-static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgti(i0,r0,i1)                 _bgti(_jit,i0,r0,i1)
-static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgti_u(i0,r0,i1)               _bgti_u(_jit,i0,r0,i1)
-static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1,0)
+#define bger_u(i0,r0,r1)               _bger(_jit,i0,r0,r1,1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+                       jit_bool_t);
+#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1,0,0)
+#define bgei_u(i0,r0,i1)               _bgei(_jit,i0,r0,i1,1,0)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                       jit_bool_t,jit_bool_t);
+#define bgtr(i0,r0,r1)                 _bgtr(_jit,i0,r0,r1,0,0)
+#define bgtr_u(i0,r0,r1)               _bgtr(_jit,i0,r0,r1,1,0)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+                       jit_bool_t,jit_bool_t);
+#define bgti(i0,r0,i1)                 _bgti(_jit,i0,r0,i1,0,0)
+#define bgti_u(i0,r0,i1)               _bgti(_jit,i0,r0,i1,1,0)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                       jit_bool_t,jit_bool_t);
 #define bner(i0,r0,r1)                 _bner(_jit,i0,r0,r1)
 static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define bnei(i0,r0,i1)                 _bnei(_jit,i0,r0,i1)
 static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #  define jmpr(r0)                     _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
-#  define jmpi(i0)                     _jmpi(_jit,i0)
-static jit_word_t _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi(i0,patch)               _jmpi(_jit,i0,patch)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t,jit_bool_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define boaddr(i0,r0,r1)             _boaddr(_jit,i0,r0,r1)
 static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define boaddi(i0,r0,i1)             _boaddi(_jit,i0,r0,i1)
@@ -758,8 +824,8 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #  define callr(r0)                    _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
-#  define calli(i0)                    _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+#  define calli(i0,i1)                 _calli(_jit,i0,i1)
+static jit_word_t _calli(jit_state_t*,jit_word_t,jit_bool_t);
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(node)                 _prolog(_jit,node)
@@ -774,9 +840,584 @@ static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
 static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t);
 #define patch_at(jump,label)           _patch_at(_jit,jump,label)
 static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+/* definitions used by jit_get_reg_for_delay_slot() */
+#include "jit_mips-fpu.c"
 #endif
 
 #if CODE
+static void
+_instr(jit_state_t *_jit, jit_int32_t op)
+{
+    if (_jitc->inst.pend)
+       ii(_jitc->inst.op);
+    else
+       _jitc->inst.pend = 1;
+    _jitc->inst.op = op;
+}
+
+static void
+_flush(jit_state_t *_jit)
+{
+    if (_jitc->inst.pend) {
+       ii(_jitc->inst.op);
+       _jitc->inst.pend = 0;
+    }
+}
+
+static jit_int32_t
+_pending(jit_state_t *_jit)
+{
+    jit_int32_t                op;
+    if (_jitc->inst.pend) {
+       op = _jitc->inst.op;
+       _jitc->inst.pend = 0;
+    }
+    else
+       op = 0;
+    return (op);
+}
+
+static void
+_delay(jit_state_t *_jit, jit_int32_t op)
+{
+    assert(_jitc->inst.pend);
+    ii(_jitc->inst.op);
+    _jitc->inst.pend = 0;
+    ii(op);
+}
+
+static jit_int32_t
+_jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
+                           jit_int32_t reg0, jit_int32_t reg1)
+{
+    jit_instr_t                i;
+    jit_int32_t                reg, r0, r1, r2, regs[3];
+    /* If will emit a pending instruction */
+    if (_jitc->inst.pend)
+       i.op = _jitc->inst.op;
+    /* Else if at least one instruction emited, check it */
+    else if (_jit->pc.uc > _jit->code.ptr)
+       i.op = _jit->pc.ui[-1];
+    /* Else, a nop */
+    else
+       i.op = 0;
+    regs[0] = regs[1] = regs[2] = -1;
+    switch (i.hc.b) {
+       case MIPS_SPECIAL:              /* 00 */
+           switch (i.tc.b) {
+               case MIPS_SLLV:         /* 04 */
+               case MIPS_SRLV:         /* 06 */
+               case MIPS_SRAV:         /* 07 */
+               case MIPS_DSLLV:        /* 14 */
+               case MIPS_DSRLV:        /* 16 */
+               case MIPS_DSRAV:        /* 17 */
+               case MIPS_ADDU:         /* 21 */
+               case MIPS_SUBU:         /* 23 */
+               case MIPS_AND:          /* 24 */
+               case MIPS_OR:           /* 25 */
+               case MIPS_XOR:          /* 26 */
+               case MIPS_NOR:          /* 27 */
+               case MIPS_SLT:          /* 2a */
+               case MIPS_SLTU:         /* 2b */
+               case MIPS_DADDU:        /* 2d */
+               case MIPS_DSUBU:        /* 2f */
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+                   /* MUL MUH */
+               case MIPS_MULT:         /* 18 */
+                   /* MULU MUHU */
+               case MIPS_MULTU:        /* 19 */
+                   /* DIV MOD */
+               case MIPS_DIV:          /* 1a */
+                   /* DIVU MODU */
+               case MIPS_DIVU:         /* 1b */
+                   /* DMUL DMUH */
+               case MIPS_DMULT:        /* 1c */
+                   /* DMULU DMUHU */
+               case MIPS_DMULTU:       /* 1d */
+                   /* DDIV DMOD */
+               case MIPS_DDIV:         /* 1e */
+                   /* DDIVU DMODU */
+               case MIPS_DDIVU:        /* 1f */
+                   if (jit_mips6_p()) {
+                       assert(i.ic.b == 2 || i.ic.b == 3);
+                       if (mask & jit_class_gpr) {
+                           regs[0] = i.rs.b;
+                           regs[1] = i.rt.b;
+                           regs[2] = i.rd.b;
+                       }
+                   }
+                   else {
+                       assert(i.rd.b == 0);
+                       if (mask & jit_class_gpr) {
+                           regs[0] = i.rs.b;
+                           regs[1] = i.rt.b;
+                           regs[2] = 0;
+                       }
+                   }
+                   break;
+                   /* CLZ */
+               case MIPS_MFHI:         /* 10 */
+                   /* CLO */
+               case MIPS_MTHI:         /* 11 */
+                   /* DCLZ */
+               case MIPS_MFLO:         /* 12 */
+                   /* DCLO */
+               case MIPS_MTLO:         /* 13 */
+                   if (mask & jit_class_gpr) {
+                       if (jit_mips6_p()) {
+                           assert(i.ic.b == 1);
+                           regs[1] = i.rd.b;
+                       }
+                       else {
+                           assert(!i.rs.b && !i.rt.b);
+                           regs[1] = 0;
+                       }
+                       regs[0] = i.rd.b;
+                       regs[1] = 0;
+                   }
+                   break;
+               case MIPS_JR:           /* 08 */
+                   assert(!jit_mips6_p());
+               case MIPS_JALR:         /* 09 */
+                   /* check for proper/known encondig */
+                   assert(!i.ic.b);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+               case MIPS_SLL:          /* 00 */
+               case MIPS_SRL:          /* 02 */
+               case MIPS_SRA:          /* 03 */
+               case MIPS_DSLL:         /* 38 */
+               case MIPS_DSRL:         /* 3a */
+               case MIPS_DSRA:         /* 3b */
+               case MIPS_DSLL32:       /* 3c */
+               case MIPS_DSRA32:       /* 3f */
+               case MIPS_DSRL32:       /* 3e */
+                   /* shift (or rotate if i.rs.b == 1) */
+                   assert(i.rs.b == 0 || i.rs.b == 1);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rt.b;
+                       regs[1] = i.rd.b;
+                       regs[2] = 0;
+                   }
+                   break;
+               case MIPS_SYNC:         /* 0f */
+                   assert(i.rs.b == 0 && i.rt.b == 0 && i.rd.b == 0);
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[1] = 0;
+                   break;
+               case MIPS_MOVZ:         /* 0a */
+               case MIPS_MOVN:         /* 0b */
+                   assert(!jit_mips6_p() && i.ic.b == 0);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+               /* SELEQZ */
+               case 53:                /* 35 */
+               /* SELNEZ */
+               case 55:                /* 37 */
+                   assert(jit_mips6_p() && i.ic.b == 0);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+               default:
+                   abort();
+           }
+           break;
+       case MIPS_REGIMM:               /* 01 */
+           switch (i.rt.b) {
+               case MIPS_BLTZ:         /* 00 */
+               case MIPS_BGEZ:         /* 01 */
+               case MIPS_BGEZAL:       /* 11 */
+                   break;
+               default:
+                   abort();
+           }
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = regs[2] = 0;
+           }
+           break;
+       case MIPS_J:                    /* 02 */
+       case MIPS_JAL:                  /* 03 */
+           if (mask & jit_class_gpr)
+               regs[0] = regs[1] = regs[2] = 0;
+           break;
+       case MIPS_LUI:                  /* 0f */
+           assert(i.rs.b == 0);
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rt.b;
+               regs[1] = regs[1] = 0;
+           }
+           break;
+       case MIPS_SPECIAL2:             /* 1c */
+           switch (i.tc.b) {
+               case MIPS_CLZ:          /* 20 */
+               case MIPS_CLO:          /* 21 */
+               case MIPS_DCLZ:         /* 24 */
+               case MIPS_DCLO:         /* 25 */
+                   assert(!jit_mips6_p() && i.rt.b == i.rd.b && i.ic.b == 0);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rd.b;
+                       regs[2] = 0;
+                   }
+                   break;
+               case MIPS_MUL:          /* 02 */
+                   assert(jit_mips2_p() && i.ic.b == 0);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+               default:
+                   abort();
+           }
+           break;
+       case MIPS_SPECIAL3:             /* 1f */
+           switch (i.tc.b) {
+               case MIPS_EXT:          /* 00 */
+               case MIPS_DEXTM:        /* 01 */
+               case MIPS_DEXTU:        /* 02 */
+               case MIPS_DEXT:         /* 03 */
+               case MIPS_INS:          /* 04 */
+               case MIPS_DINSM:        /* 05 */
+               case MIPS_DINSU:        /* 06 */
+               case MIPS_DINS:         /* 07 */
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = 0;
+                   }
+                   break;
+               /* BITSWAP */
+               case MIPS_BSHFL:        /* 20 */
+               /* DBITSWAP */
+               case MIPS_DBSHFL:       /* 24 */
+                   switch (i.ic.b) {
+                       case MIPS_WSBH: /* 02 */
+                       case MIPS_SEB:  /* 10 */
+                       case MIPS_SEH:  /* 18 */
+                           if (mask & jit_class_gpr) {
+                               regs[0] = i.rt.b;
+                               regs[1] = i.rd.b;
+                               regs[2] = 0;
+                           }
+                           break;
+                       /* BITSWAP DBITSWAP */
+                       case 0:
+                           assert(jit_mips6_p() && i.rt.b == 0);
+                           if (mask & jit_class_gpr) {
+                               regs[0] = i.rs.b;
+                               regs[1] = i.rd.b;
+                               regs[2] = 0;
+                           }
+                           break;
+                       default:
+                           abort();
+                   }
+                   break;
+               /* SC */
+               case 38:                /* 26 */
+               /* SCD */
+               case 39:                /* 27 */
+               /* LD */
+               case 54:                /* 36 */
+               /* LLD */
+               case 55:                /* 37 */
+                   assert(jit_mips6_p());
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = 0;
+                   }
+                   break;
+               default:
+                   abort();
+           }
+           break;
+       case MIPS_COP1:                 /* 11 */
+           switch (i.tc.b) {
+               case MIPS_ADD_fmt:      /* 00 */
+                   switch (i.rs.b) {
+                       case MIPS_MF:   /* 00 */
+                       case MIPS_DMF:  /* 01 */
+                       case MIPS_MFH:  /* 03 */
+                       case MIPS_MT:   /* 04 */
+                       case MIPS_DMT:  /* 05 */
+                       case MIPS_MTH:  /* 07 */
+                           assert(i.ic.b == 0);
+                           if (mask & jit_class_gpr) {
+                               regs[0] = i.rt.b;
+                               regs[1] = regs[2] = 0;
+                           }
+                           else
+                               regs[0] = i.rd.b;
+                           break;
+                       default:
+                           goto three_fprs;
+                   }
+                   break;
+               case MIPS_SUB_fmt:      /* 01 */
+               case MIPS_MUL_fmt:      /* 02 */
+               case MIPS_DIV_fmt:      /* 03 */
+               three_fprs:
+                                       /* 10 */
+                   assert(i.rs.b == MIPS_fmt_S ||
+                                       /* 11 */
+                          i.rs.b == MIPS_fmt_D);
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[2] = 0;
+                   else {
+                       regs[0] = i.rt.b;
+                       regs[1] = i.rd.b;
+                       regs[2] = i.ic.b;
+                   }
+                   break;
+               case MIPS_SQRT_fmt:     /* 04 */
+               case MIPS_ABS_fmt:      /* 05 */
+               case MIPS_MOV_fmt:      /* 06 */
+               case MIPS_NEG_fmt:      /* 07 */
+                   assert((i.rs.b == MIPS_fmt_S || i.rs.b == MIPS_fmt_D) &&
+                          i.rt.b == 0);
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[2] = 0;
+                   else {
+                       regs[0] = i.rd.b;
+                       regs[1] = i.ic.b;
+                   }
+                   break;
+               case MIPS_CVT_fmt_S:    /* 20 */
+               case MIPS_CVT_fmt_D:    /* 21 */
+               case MIPS_CVT_fmt_W:    /* 24 */
+               case MIPS_CVT_fmt_L:    /* 25 */
+                   switch (i.rs.b) {
+                       case MIPS_fmt_S:/* 10 */
+                       case MIPS_fmt_D:/* 11 */
+                       case MIPS_fmt_W:/* 14 */
+                       case MIPS_fmt_L:/* 15 */
+                           break;
+                       default:
+                           abort();
+                   }
+                   assert(i.rt.b == 0);
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[2] = 0;
+                   else {
+                       regs[0] = i.rd.b;
+                       regs[1] = i.ic.b;
+                   }
+                   break;
+               case MIPS_cond_F:       /* 30 */
+               case MIPS_cond_UN:      /* 31 */
+               case MIPS_cond_EQ:      /* 32 */
+               case MIPS_cond_UEQ:     /* 33 */
+               case MIPS_cond_OLT:     /* 34 */
+               case MIPS_cond_ULT:     /* 35 */
+               case MIPS_cond_OLE:     /* 36 */
+               case MIPS_cond_ULE:     /* 37 */
+               case MIPS_cond_SF:      /* 38 */
+               case MIPS_cond_NGLE:    /* 39 */
+               case MIPS_cond_SEQ:     /* 3a */
+               case MIPS_cond_NGL:     /* 3b */
+               case MIPS_cond_LT:      /* 3c */
+               case MIPS_cond_NGE:     /* 3d */
+               case MIPS_cond_LE:      /* 3e */
+               case MIPS_cond_UGT:     /* 3f */
+                   assert(!jit_mips6_p() &&
+                                       /* 10 */
+                          (i.fm.b == MIPS_fmt_S ||
+                                       /* 11 */
+                           i.fm.b == MIPS_fmt_D));
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[2] = 0;
+                   else {
+                       regs[0] = i.ft.b;
+                       regs[1] = i.fs.b;
+                   }
+                   break;
+               default:
+                   switch (i.ic.b) {
+                       case MIPS_cmp_AF:  /* 00 */
+                       case MIPS_cmp_UN:  /* 01 */
+                       case MIPS_cmp_EQ:  /* 02 */
+                       case MIPS_cmp_UEQ: /* 03 */
+                       case MIPS_cmp_LT:  /* 04 */
+                       case MIPS_cmp_ULT: /* 05 */
+                       case MIPS_cmp_LE:  /* 06 */
+                       case MIPS_cmp_ULE: /* 07 */
+                       case MIPS_cmp_SAF: /* 08 */
+                       case MIPS_cmp_SUN: /* 09 */
+                       case MIPS_cmp_SEQ: /* 0a */
+                       case MIPS_cmp_SUEQ:/* 0b */
+                       case MIPS_cmp_SLT: /* 0c */
+                       case MIPS_cmp_SULT:/* 0d */
+                       case MIPS_cmp_SLE: /* 0e */
+                       case MIPS_cmp_SULE:/* 0f */
+                           assert(jit_mips6_p() &&
+                                          /* 14 */
+                                  (i.rs.b == MIPS_condn_S ||
+                                          /* 15 */
+                                   i.rs.b == MIPS_condn_D));
+                           if (mask & jit_class_gpr)
+                               regs[0] = regs[1] = regs[2] = 0;
+                           else {
+                               regs[0] = i.ft.b;
+                               regs[1] = i.fs.b;
+                               regs[2] = i.fd.b;
+                           }
+                           goto done;
+                       default:
+                           break;
+                   }
+                   switch (i.rt.b) {
+                       case MIPS_BC:   /* 08 */
+                           assert(!jit_mips6_p() &&
+                                       /* 00 */
+                                  (i.rs.b == MIPS_BCF ||
+                                       /* 01 */
+                                   i.rs.b == MIPS_BCT));
+                           if (mask & jit_class_gpr)
+                               regs[0] = regs[1] = regs[2] = 0;
+                           else {
+                               regs[0] = i.rt.b;
+                               regs[1] = i.rd.b;
+                           }
+                           break;
+                       case MIPS_BC1EQZ:/* 09 */
+                       case MIPS_BC1NEZ:/* 0a */
+                           assert(jit_mips6_p());
+                           if (mask & jit_class_gpr)
+                               regs[0] = regs[1] = regs[2] = 0;
+                           else
+                               regs[0] = i.rt.b;
+                           break;
+                       default:
+                           abort();
+                   }
+                   break;
+           }
+           break;
+       case MIPS_ADDIU:                /* 09 */
+       case MIPS_SLTI:                 /* 0a */
+       case MIPS_SLTIU:                /* 0b */
+       case MIPS_ANDI:                 /* 0c */
+       case MIPS_ORI:                  /* 0d */
+       case MIPS_XORI:                 /* 0e */
+       case MIPS_DADDIU:               /* 18 */
+       case MIPS_LB:                   /* 20 */
+       case MIPS_LH:                   /* 21 */
+       case MIPS_LW:                   /* 23 */
+       case MIPS_LBU:                  /* 24 */
+       case MIPS_LHU:                  /* 25 */
+       case MIPS_LWU:                  /* 27 */
+       case MIPS_SB:                   /* 28 */
+       case MIPS_SH:                   // 29 */
+       case MIPS_SW:                   /* 2b */
+       case MIPS_LD:                   /* 37 */
+       case MIPS_SD:                   /* 3f */
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = i.rt.b;
+               regs[2] = 0;
+           }
+           break;
+       case MIPS_LL:                   /* 30 */
+       case MIPS_LLD:                  /* 34 */
+       case MIPS_SC:                   /* 38 */
+       case MIPS_SCD:                  /* 3c */
+           assert(!jit_mips6_p() && i.ic.b == 0);
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = i.rt.b;
+               regs[2] = 0;
+           }
+           break;
+       case MIPS_BLEZ:                 /* 06 */
+       case MIPS_BGTZ:                 /* 07 */
+           assert(i.rt.b == 0);
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = regs[2] = 0;
+           }
+           break;
+       case MIPS_BEQ:                  /* 04 */
+       case MIPS_BNE:                  /* 05 */
+           assert(i.rt.b == 0);
+       case MIPS_LWC1:                 /* 31 */
+       case MIPS_LDC1:                 /* 35 */
+       case MIPS_SWC1:                 /* 39 */
+       case MIPS_SDC1:                 /* 3d */
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = i.rt.b;
+               regs[2] = 0;
+           }
+           else
+               regs[0] = i.rt.b;
+           break;
+       default:
+           abort();
+    }
+done:
+    /* If cannot move instruction do delay slot */
+    if (_jitc->inst.pend &&
+       (((mask & jit_class_fpr) || reg0) &&
+        (reg0 == regs[0] || reg0 == regs[1] || reg0 == regs[2])) ||
+       (((mask & jit_class_fpr) || reg1) &&
+        (reg1 == regs[0] || reg1 == regs[1] || reg1 == regs[2]))) {
+       flush();
+    }
+    /* Get a temporary register */
+retry:
+    reg = jit_get_reg(mask|jit_class_nospill);
+    /* Make sure will not use a register in use by delay slot */
+    if (_jitc->inst.pend) {
+       if (rn(reg) == regs[0] ||
+           rn(reg) == regs[1] || rn(reg) == regs[2]) {
+           r0 = reg;
+           reg = jit_get_reg(mask|jit_class_nospill);
+           if (rn(reg) == regs[0] ||
+               rn(reg) == regs[1] || rn(reg) == regs[2]) {
+               r1 = reg;
+               reg = jit_get_reg(mask|jit_class_nospill);
+               if (rn(reg) == regs[0] ||
+                   rn(reg) == regs[1] || rn(reg) == regs[2]) {
+                   r2 = reg;
+                   reg = jit_get_reg(mask|jit_class_nospill);
+                   jit_unget_reg(r2);
+               }
+               jit_unget_reg(r1);
+           }
+           jit_unget_reg(r0);
+       }
+    }
+    if (reg == JIT_NOREG) {
+       /* Cannot get a register to optimize delay slot */
+       flush();
+       /* Must find a free register */
+       if (!(mask & jit_class_chk))
+           goto retry;
+    }
+    assert(reg != JIT_NOREG || (mask & jit_class_chk));
+    return (reg);
+}
+
 static void
 _hrrrit(jit_state_t *_jit,jit_int32_t hc,
        jit_int32_t rs, jit_int32_t rt, jit_int32_t rd,
@@ -789,7 +1430,7 @@ _hrrrit(jit_state_t *_jit,jit_int32_t hc,
     i.rt.b = rt;
     i.rs.b = rs;
     i.hc.b = hc;
-    ii(i.op);
+    instr(i.op);
 }
 
 static void
@@ -802,7 +1443,21 @@ _hrri(jit_state_t *_jit, jit_int32_t hc,
     i.rt.b = rt;
     i.rs.b = rs;
     i.hc.b = hc;
-    ii(i.op);
+    instr(i.op);
+}
+
+static void
+_hrri9(jit_state_t *_jit, jit_int32_t hc,
+      jit_int32_t rs, jit_int32_t rt, jit_int32_t i9, jit_int32_t tc)
+{
+    jit_instr_t                i;
+    i.op = 0;
+    i.tc.b = tc;
+    i.i9.b = i9;
+    i.rt.b = rt;
+    i.rs.b = rs;
+    i.hc.b = hc;
+    instr(i.op);
 }
 
 static void
@@ -811,7 +1466,7 @@ _hi(jit_state_t *_jit, jit_int32_t hc, jit_int32_t im)
     jit_instr_t                i;
     i.ii.b = im;
     i.hc.b = hc;
-    ii(i.op);
+    instr(i.op);
 }
 
 static void
@@ -854,6 +1509,121 @@ _insr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
         DINS(r0, r1, pos, size);
 }
 
+/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+/*
+unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2 
+unsigned int mask = ~0;         
+while ((s >>= 1) > 0) 
+{
+  mask ^= (mask << s);
+  v = ((v >> s) & mask) | ((v << s) & ~mask);
+}
+*/
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t v, jit_int32_t r1)
+{
+    jit_int32_t                s, mask;
+    jit_word_t         loop, done, t0, t1;
+    movr(v, r1);
+    s = jit_get_reg(jit_class_gpr);
+    movi(rn(s), __WORDSIZE);                   /* s = sizeof(v) * CHAR_BIT; */
+    mask = jit_get_reg(jit_class_gpr);
+    movi(rn(mask), ~0L);                       /* mask = ~0; */
+    flush();
+    loop = _jit->pc.w;                         /* while ((s >>= 1) > 0) */
+    rshi(rn(s), rn(s), 1);                     /*        (s >>= 1) */
+    done = blei(_jit->pc.w, rn(s), 0);         /* no loop if s <= 0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    lshr(rn(t0), rn(mask), rn(s));             /* t0 = (mask << s) */
+    xorr(rn(mask), rn(mask), rn(t0));          /* mask ^= t0 */
+    rshr(rn(t0), v, rn(s));                    /* t0 = v >> s */
+    andr(rn(t0), rn(t0), rn(mask));            /* t0 = t0 & mask */
+    t1 = jit_get_reg(jit_class_gpr);
+    lshr(rn(t1), v, rn(s));                    /* t1 = v << s */
+    comr(v, rn(mask));                         /* v = ~mask */
+    andr(rn(t1), v, rn(t1));                   /* t1 = t1 & v */
+    orr(v, rn(t0), rn(t1));                    /* v = t0 | t1 */
+    jmpi(loop, 0);
+    flush();
+    patch_at(done, _jit->pc.w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    jit_unget_reg(mask);
+    jit_unget_reg(s);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+    if (jit_mips6_p())
+       CLO_R6(r0, r1);
+    else
+       CLO(r0, r1);
+#else
+    if (jit_mips6_p())
+       DCLO_R6(r0, r1);
+    else
+       DCLO(r0, r1);
+#endif
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+    if (jit_mips6_p())
+       CLZ_R6(r0, r1);
+    else
+       CLZ(r0, r1);
+#else
+    if (jit_mips6_p())
+       DCLZ_R6(r0, r1);
+    else
+       DCLZ(r0, r1);
+#endif
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_mips6_p()) {
+#if __WORDSIZE == 32
+       BITSWAP(r0, r1);
+       bswapr_ui(r0, r0);
+       CLO_R6(r0, r0);
+#else
+       DBITSWAP(r0, r1);
+       bswapr_ul(r0, r0);
+       DCLO_R6(r0, r0);
+#endif
+    }
+    else {
+       bitswap(r0, r1);
+       clor(r0, r0);
+    }
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_mips6_p()) {
+#if __WORDSIZE == 32
+       BITSWAP(r0, r1);
+       bswapr_ui(r0, r0);
+       CLZ_R6(r0, r0);
+#else
+       DBITSWAP(r0, r1);
+       bswapr_ul(r0, r0);
+       DCLZ_R6(r0, r0);
+#endif
+    }
+    else {
+       bitswap(r0, r1);
+       clzr(r0, r0);
+    }
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -1048,11 +1818,15 @@ _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    if (jit_mips2_p() && __WORDSIZE == 32)
-       MUL(r0, r1, r2);
+    if (jit_mips6_p())
+       mul_r6(r0, r1, r2);
     else {
-        multu(r1, r2);
-        MFLO(r0);
+       if (jit_mips2_p() && __WORDSIZE == 32)
+           MUL(r0, r1, r2);
+       else {
+           multu(r1, r2);
+           MFLO(r0);
+       }
     }
 }
 
@@ -1071,12 +1845,38 @@ static void
 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
        jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
 {
-    if (sign)
-       mult(r2, r3);
-    else
-       multu(r2, r3);
-    MFLO(r0);
-    MFHI(r1);
+    jit_int32_t                t0;
+    if (jit_mips6_p()) {
+       if (r0 == r2 || r0 == r3) {
+           t0 = jit_get_reg(jit_class_gpr);
+           if (sign)
+               mul_r6(rn(t0), r2, r3);
+           else
+               mulu_r6(rn(t0), r2, r3);
+       }
+       else {
+           if (sign)
+               mul_r6(r0, r2, r3);
+           else
+               mulu_r6(r0, r2, r3);
+       }
+       if (sign)
+           muh_r6(r1, r2, r3);
+       else
+           muhu_r6(r1, r2, r3);
+       if (r0 == r2 || r0 == r3) {
+           movr(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+    }
+    else {
+       if (sign)
+           mult(r2, r3);
+       else
+           multu(r2, r3);
+       MFLO(r0);
+       MFHI(r1);
+    }
 }
 
 static void
@@ -1093,8 +1893,12 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
 static void
 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    div(r1, r2);
-    MFLO(r0);
+    if (jit_mips6_p())
+       div_r6(r0, r1, r2);
+    else {
+       div(r1, r2);
+       MFLO(r0);
+    }
 }
 
 static void
@@ -1110,8 +1914,12 @@ _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    divu(r1, r2);
-    MFLO(r0);
+    if (jit_mips6_p())
+       divu_r6(r0, r1, r2);
+    else {
+       divu(r1, r2);
+       MFLO(r0);
+    }
 }
 
 static void
@@ -1128,12 +1936,39 @@ static void
 _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
        jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
 {
-    if (sign)
-       div(r2, r3);
-    else
-       divu(r2, r3);
-    MFLO(r0);
-    MFHI(r1);
+    jit_int32_t                t0;
+    if (jit_mips6_p()) {
+       if (r0 == r2 || r0 == r3)
+           t0 = jit_get_reg(jit_class_gpr);
+       else
+           t0 = _NOREG;
+       if (sign) {
+           if (t0 == _NOREG)
+               div_r6(r0, r2, r3);
+           else
+               div_r6(rn(t0), r2, r3);
+           mod_r6(r1, r2, r3);
+       }
+       else {
+           if (t0 == _NOREG)
+               divu_r6(r0, r2, r3);
+           else
+               divu_r6(rn(t0), r2, r3);
+           modu_r6(r1, r2, r3);
+       }
+       if (t0 != _NOREG) {
+           movr(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+    }
+    else {
+       if (sign)
+           div(r2, r3);
+       else
+           divu(r2, r3);
+       MFLO(r0);
+       MFHI(r1);
+    }
 }
 
 static void
@@ -1150,8 +1985,12 @@ _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
 static void
 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    div(r1, r2);
-    MFHI(r0);
+    if (jit_mips6_p())
+       mod_r6(r0, r1, r2);
+    else {
+       div(r1, r2);
+       MFHI(r0);
+    }
 }
 
 static void
@@ -1167,8 +2006,12 @@ _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    divu(r1, r2);
-    MFHI(r0);
+    if (jit_mips6_p())
+       modu_r6(r0, r1, r2);
+    else {
+       divu(r1, r2);
+       MFHI(r0);
+    }
 }
 
 static void
@@ -1322,7 +2165,7 @@ static jit_word_t
 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_word_t         w;
-
+    flush();
     w = _jit->pc.w;
 #  if __WORDSIZE == 32
     LUI(r0, i0 >> 16);
@@ -1339,6 +2182,36 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     return (w);
 }
 
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_gpr);
+       SELNEZ(rn(reg), r1, r2);
+       SELEQZ(r0, r0, r2);
+       OR(r0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       MOVN(r0, r1, r2);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_gpr);
+       SELEQZ(rn(reg), r1, r2);
+       SELNEZ(r0, r0, r2);
+       OR(r0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       MOVZ(r0, r1, r2);
+}
+
 static void
 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
@@ -1352,27 +2225,37 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     }
     SYNC();
     /* retry: */
+    flush();
     retry = _jit->pc.w;
 #  if __WORDSIZE == 32
-    LL(r0, 0, r1);
+    if (jit_mips6_p()) LL_R6(r0, 0, r1);
+    else               LL(r0, 0, r1);
 #  else
-    LLD(r0, 0, r1);
+    if (jit_mips6_p()) LLD_R6(r0, 0, r1);
+    else               LLD(r0, 0, r1);
 #  endif
+    flush();
     jump0 = _jit->pc.w;
     BNE(r0, r2, 1);                            /* bne done r0 r2 */
     movi(r0, 0);                               /* set to 0 in delay slot */
+    flush();
     movr(r0, r3);                              /* after jump and delay slot */
     /* store new value */
 #  if __WORDSIZE == 32
-    SC(r0, 0, r1);
+    if (jit_mips6_p()) SC_R6(r0, 0, r1);
+    else               SC(r0, 0, r1);
 #  else
-    SCD(r0, 0, r1);
+    if (jit_mips6_p()) SCD_R6(r0, 0, r1);
+    else               SCD(r0, 0, r1);
 #  endif
+    flush();
     jump1 = _jit->pc.w;
     BEQ(r0, _ZERO_REGNO, 0);                   /* beqi retry r0 0 */
     movi(r0, 1);                               /* set to 1 in delay slot */
+    flush();
     SYNC();
     /* done: */
+    flush();
     done = _jit->pc.w;
     patch_at(jump0, done);
     patch_at(jump1, retry);
@@ -1483,120 +2366,90 @@ _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 static void
 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_c(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_c(r0, r0);
 }
 
 static void
 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LB(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_c(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_c(r0, r0);
     }
 }
 
 static void
 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_uc(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_uc(r0, r0);
 }
 
 static void
 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LBU(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_uc(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_uc(r0, r0);
     }
 }
 
 static void
 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_s(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_s(r0, r0);
 }
 
 static void
 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LH(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_s(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_s(r0, r0);
     }
 }
 
 static void
 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_us(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_us(r0, r0);
 }
 
 static void
 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LHU(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_us(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_us(r0, r0);
     }
 }
 
 static void
 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_i(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_i(r0, r0);
 }
 
 static void
 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LW(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_i(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_i(r0, r0);
     }
 }
 
@@ -1604,48 +2457,36 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_ui(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_ui(r0, r0);
 }
 
 static void
 _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LWU(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_ui(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_ui(r0, r0);
     }
 }
 
 static void
 _ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_l(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_l(r0, r0);
 }
 
 static void
 _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LD(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_l(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_l(r0, r0);
     }
 }
 #endif
@@ -1948,8 +2789,7 @@ static void
 _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     subr(r0, r1, r2);
-    SLTU(r0, _ZERO_REGNO, r0);
-    XORI(r0, r0, 1);
+    SLTIU(r0, r0, 1);
 }
 
 static void
@@ -1957,11 +2797,10 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     if (i0) {
        subi(r0, r1, i0);
-       SLTU(r0, _ZERO_REGNO, r0);
+       SLTIU(r0, r0, 1);
+    } else {
+       SLTIU(r0, r1, 1);
     }
-    else
-       SLTU(r0, _ZERO_REGNO, r1);
-    XORI(r0, r0, 1);
 }
 
 static void
@@ -2059,173 +2898,19 @@ _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 }
 
 static jit_word_t
-_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr);
-    SLT(rn(reg), r0, r1);
-    w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r0, r1);
-    w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_word_t         d;
-    jit_int32_t                reg;
-    jit_bool_t         zero_p;
-
-    if (!(zero_p = i1 == 0))
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    if (can_sign_extend_short_p(i1)) {
-       if (!zero_p)
-           SLTI(rn(reg), r0, i1);
-       w = _jit->pc.w;
-       d = ((i0 - w) >> 2) - 1;
-       if (!zero_p)
-           BNE(rn(reg), _ZERO_REGNO, d);
-       else
-           BLTZ(r0, d);
-       NOP(1);
-    }
-    else {
-       movi(rn(reg), i1);
-       w = bltr(i0, r0, rn(reg));
-    }
-    if (!zero_p)
-       jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    if (can_sign_extend_short_p(i1)) {
-       SLTIU(rn(reg), r0, i1);
-       w = _jit->pc.w;
-       BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       movi(rn(reg), i1);
-       w = bltr_u(i0, r0, rn(reg));
-    }
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLT(rn(reg), r1, r0);
-    w = _jit->pc.w;
-    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r1, r0);
-    w = _jit->pc.w;
-    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       BLEZ(r0, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi(rn(reg), i1);
-       w = bler(i0, r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-
-    return (w);
-}
-
-static jit_word_t
-_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi(rn(reg), i1);
-       w = bler_u(i0, r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-
-    return (w);
-}
-
-static jit_word_t
-_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-
+    jit_int32_t                op, reg;
+    /* Just to not move incorrectly instruction to delay slot */
+    reg = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, r0, r1);
+    op = pending();
+    /* implicit flush() */
     w = _jit->pc.w;
     BEQ(r0, r1, ((i0 - w) >> 2) - 1);
-    NOP(1);
-
+    delay(op);
+    if (reg != JIT_NOREG)
+       jit_unget_reg(reg);
     return (w);
 }
 
@@ -2233,179 +2918,168 @@ static jit_word_t
 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
+    jit_int32_t                op, reg;
+    if (i1 == 0)
+       w = beqr(i0, r0, _ZERO_REGNO);
     else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+       op = pending();
        movi(rn(reg), i1);
-       w = beqr(i0, r0, rn(reg));
+       flush();
+       w = _jit->pc.w;
+       BEQ(r0, rn(reg), ((i0 - w) >> 2) - 1);
+       delay(op);
        jit_unget_reg(reg);
     }
-
     return (w);
 }
 
 static jit_word_t
-_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+      jit_bool_t sltu)
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLT(rn(reg), r0, r1);
-    w = _jit->pc.w;
-    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r0, r1);
+    jit_int32_t                op, reg;
+    reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+    op = pending();
+    if (sltu)
+       SLTU(rn(reg), r0, r1);
+    else
+       SLT(rn(reg), r0, r1);
+    flush();
     w = _jit->pc.w;
     BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
+    delay(op);
     jit_unget_reg(reg);
-
     return (w);
 }
 
 static jit_word_t
-_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+      jit_bool_t sltiu, jit_bool_t bne)
 {
     jit_word_t         w;
     jit_word_t         d;
-    jit_int32_t                reg;
     jit_bool_t         zero_p;
-
-    if (!(zero_p = i1 == 0))
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    jit_int32_t                op, t0, mask;
+    zero_p = !sltiu && i1 == 0;
+    /* Even if zero_p allocate one as a mean to avoid incorrect delay slot */
+    mask = jit_class_gpr;
+    if (zero_p)
+       mask |= jit_class_chk;
+    t0 = jit_get_reg_for_delay_slot(mask, r0, _ZERO_REGNO);
     if (can_sign_extend_short_p(i1)) {
-       if (!zero_p)
-           SLTI(rn(reg), r0, i1);
+       op = pending();
+       if (!zero_p) {
+           if (sltiu)
+               SLTIU(rn(t0), r0, i1);
+           else
+               SLTI(rn(t0), r0, i1);
+        }
+       flush();
        w = _jit->pc.w;
        d = ((i0 - w) >> 2) - 1;
-       if (!zero_p)
-           BEQ(rn(reg), _ZERO_REGNO, d);
-       else
-           BGEZ(r0, d);
-       NOP(1);
+       if (bne) {
+           if (!zero_p)
+               BNE(rn(t0), _ZERO_REGNO, d);
+           else
+               BLTZ(r0, d);
+       }
+       else {
+           if (!zero_p)
+               BEQ(rn(t0), _ZERO_REGNO, d);
+           else
+               BGEZ(r0, d);
+       }
     }
     else {
-       movi(rn(reg), i1);
-       w = bger(i0, r0, rn(reg));
-    }
-    if (!zero_p)
-       jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    if (can_sign_extend_short_p(i1)) {
-       SLTIU(rn(reg), r0, i1);
+       op = pending();
+       movi(rn(t0), i1);
+       if (sltiu)
+           SLTU(rn(t0), r0, rn(t0));
+        else
+           SLT(rn(t0), r0, rn(t0));
+       flush();
        w = _jit->pc.w;
-       BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       movi(rn(reg), i1);
-       w = bger_u(i0, r0, rn(reg));
+       if (bne)
+           BNE(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       else
+           BEQ(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     }
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLT(rn(reg), r1, r0);
-    w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
     return (w);
 }
 
 static jit_word_t
-_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+      jit_bool_t sltu, jit_bool_t inv)
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r1, r0);
+    jit_int32_t                op, reg;
+    reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+    op = pending();
+    if (sltu)
+       SLTU(rn(reg), r1, r0);
+    else
+       SLT(rn(reg), r1, r0);
+    flush();
     w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
+    if (inv)
+       BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    else
+       BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    delay(op);
     jit_unget_reg(reg);
-
     return (w);
 }
 
 static jit_word_t
-_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+      jit_bool_t sltiu, jit_bool_t inv)
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
+    jit_int32_t                op, t0, mask;
+    mask = jit_class_gpr;
+    if (i0 == 0)
+       mask |= jit_class_chk;
+    /* Allocate even if i0 == 0 as a way to avoid incorrect delay slot */
+    t0 = jit_get_reg_for_delay_slot(mask, r0, _ZERO_REGNO);
     if (i1 == 0) {
+       op = pending();
+       /* implicit flush() */
        w = _jit->pc.w;
-       BGTZ(r0, ((i0 - w) >> 2) - 1);
-       NOP(1);
+       if (inv) {
+           if (sltiu)
+               BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+           else
+               BLEZ(r0, ((i0 - w) >> 2) - 1);
+       }
+       else {
+           if (sltiu)
+               BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+           else
+               BGTZ(r0, ((i0 - w) >> 2) - 1);
+       }
     }
     else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi(rn(reg), i1);
-       w = bgtr(i0, r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-
-    return (w);
-}
-
-static jit_word_t
-_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
+       op = pending();
+       movi(rn(t0), i1);
+       if (sltiu)
+           SLTU(rn(t0), rn(t0), r0);
+       else
+           SLT(rn(t0), rn(t0), r0);
+       flush();
        w = _jit->pc.w;
-       BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi(rn(reg), i1);
-       w = bgtr_u(i0, r0, rn(reg));
-       jit_unget_reg(reg);
+       if (inv)
+           BEQ(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       else
+           BNE(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     }
-
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
     return (w);
 }
 
@@ -2413,11 +3087,16 @@ static jit_word_t
 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
-
+    jit_int32_t                op, reg;
+    /* Just to not move incorrectly instruction to delay slot */
+    reg = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, r0, r1);
+    op = pending();
+    /* implicit flush() */
     w = _jit->pc.w;
     BNE(r0, r1, ((i0 - w) >> 2) - 1);
-    NOP(1);
-
+    delay(op);
+    if (reg != JIT_NOREG)
+       jit_unget_reg(reg);
     return (w);
 }
 
@@ -2425,48 +3104,85 @@ static jit_word_t
 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
+    jit_int32_t                op, reg;
+    if (i1 == 0)
+       w = bner(i0, r0, _ZERO_REGNO);
     else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+       op = pending();
        movi(rn(reg), i1);
-       w = bner(i0, r0, rn(reg));
+       flush();
+       w = _jit->pc.w;
+       BNE(r0, rn(reg), ((i0 - w) >> 2) - 1);
+       delay(op);
        jit_unget_reg(reg);
     }
-
     return (w);
 }
 
 static void
 _jmpr(jit_state_t *_jit, jit_int32_t r0)
 {
+    jit_int32_t                op, t0;
+    /* make sure delay slot does not use r0 */
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                   r0, _ZERO_REGNO);
+    op = pending();
     JR(r0);
-    NOP(1);
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
 }
 
 static jit_word_t
-_jmpi(jit_state_t *_jit, jit_word_t i0)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
+_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t patch)
+{
+    jit_int32_t                op, t0;
+    jit_word_t         w, disp;
+    /* try to get a pending instruction before the jump */
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, _ZERO_REGNO, _ZERO_REGNO);
+    op = pending();
+    /* implicit flush() */
     w = _jit->pc.w;
-    if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
-       J((i0 & ~0xf0000000) >> 2);
-       NOP(1);
+    if (jit_mips2_p()) {
+       disp = ((i0 - w) >> 2) - 1;
+       if (patch || can_sign_extend_short_p(disp)) {
+           BEQ(_ZERO_REGNO, _ZERO_REGNO, disp);
+           goto done;
+       }
     }
+    if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000))
+       J((i0 & ~0xf0000000) >> 2);
     else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi_p(rn(reg), i0);
-       jmpr(rn(reg));
-       jit_unget_reg(reg);
+       if (patch)
+           w = movi_p(rn(t0), i0);
+       else
+           movi(rn(t0), i0);
+       JR(rn(t0));
     }
+done:
+    delay(op);
+    jit_unget_reg(t0);
+    return (w);
+}
 
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                op, t0;
+    /* make sure delay slot does not use _T9_REGNO */
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                   _T9_REGNO, _ZERO_REGNO);
+    op = pending();
+    /* implicit flush() */
+    w = _jit->pc.w;
+    movi_p(rn(t0), i0);
+    flush();                   /* movi_p will be patched */
+    JR(rn(t0));
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
     return (w);
 }
 
@@ -2486,11 +3202,14 @@ _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
-    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    movzr(rn(t1), rn(t2), rn(t0));     /* if (r0 == 0) t1 = t2 */
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     addr(r0, r0, r1);
+    flush();
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
@@ -2514,11 +3233,14 @@ _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        addiu(rn(t1), r0, i1);
        SLT(rn(t2), r0, rn(t1));
        SLT(rn(t1), rn(t1), r0);
-       MOVZ(rn(t1), rn(t2), rn(t0));
+       movzr(rn(t1), rn(t2), rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, i1);
+       flush();
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
@@ -2543,10 +3265,13 @@ _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     addr(rn(t0), r0, r1);
     SLTU(rn(t1), rn(t0), r0);
+    flush();
+    /* cannot optimize delay slot */
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
+    flush();
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
@@ -2564,10 +3289,13 @@ _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, i1);
        SLTU(rn(t1), rn(t0), r0);
+       flush();
+       /* cannot optimize delay slot */
        w = _jit->pc.w;
        BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
+       flush();
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
@@ -2596,11 +3324,14 @@ _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
-    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    movzr(rn(t1), rn(t2), rn(t0));     /* if (r0 == 0) t1 = t2 */
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     addr(r0, r0, r1);
+    flush();
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
@@ -2624,11 +3355,14 @@ _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        addiu(rn(t1), r0, i1);
        SLT(rn(t2), r0, rn(t1));
        SLT(rn(t1), rn(t1), r0);
-       MOVZ(rn(t1), rn(t2), rn(t0));
+       movzr(rn(t1), rn(t2), rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, i1);
+       flush();
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
@@ -2653,10 +3387,13 @@ _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     addr(rn(t0), r0, r1);
     SLTU(rn(t1), rn(t0), r0);
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
+    flush();
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
@@ -2674,10 +3411,13 @@ _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, i1);
        SLTU(rn(t1), rn(t0), r0);
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
+       flush();
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
@@ -2706,11 +3446,13 @@ _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
-    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    movzr(rn(t1), rn(t2), rn(t0));     /* if (r0 == 0) t1 = t2 */
+    flush();
     w = _jit->pc.w;
     BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     subr(r0, r0, r1);
+    flush();
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
@@ -2734,11 +3476,13 @@ _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        addiu(rn(t1), r0, -i1);
        SLT(rn(t2), rn(t1), r0);
        SLT(rn(t1), r0, rn(t1));
-       MOVZ(rn(t1), rn(t2), rn(t0));
+       movzr(rn(t1), rn(t2), rn(t0));
+       flush();
        w = _jit->pc.w;
        BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, -i1);
+       flush();
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
@@ -2763,10 +3507,13 @@ _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     subr(rn(t0), r0, r1);
     SLTU(rn(t1), r0, rn(t0));
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
+    flush();
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
@@ -2784,10 +3531,13 @@ _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, -i1);
        SLTU(rn(t1), r0, rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
+       flush();
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
@@ -2816,11 +3566,14 @@ _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
-    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (t0 == 0) t1 = t2 */
+    movzr(rn(t1), rn(t2), rn(t0));     /* if (t0 == 0) t1 = t2 */
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     subr(r0, r0, r1);
+    flush();
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
@@ -2844,11 +3597,14 @@ _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        addiu(rn(t1), r0, -i1);
        SLT(rn(t2), rn(t1), r0);
        SLT(rn(t1), r0, rn(t1));
-       MOVZ(rn(t1), rn(t2), rn(t0));
+       movzr(rn(t1), rn(t2), rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, -i1);
+       flush();
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
@@ -2873,10 +3629,13 @@ _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     subr(rn(t0), r0, r1);
     SLTU(rn(t1), r0, rn(t0));
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
+    flush();
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
@@ -2894,10 +3653,13 @@ _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, -i1);
        SLTU(rn(t1), r0, rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
+       flush();
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
@@ -2914,12 +3676,14 @@ static jit_word_t
 _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    jit_int32_t                op, t0;
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+    op = pending();
     AND(rn(t0), r0, r1);
+    flush();
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
-    NOP(1);
+    delay(op);
     jit_unget_reg(t0);
     return (w);
 }
@@ -2928,14 +3692,14 @@ static jit_word_t
 _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
-
+    jit_int32_t                op, t0;
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+    op = pending();
     andi(rn(t0), r0, i1);
+    flush();
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
-    NOP(1);
-
+    delay(op);
     jit_unget_reg(t0);
     return (w);
 }
@@ -2944,12 +3708,14 @@ static jit_word_t
 _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    jit_int32_t                op, t0;
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+    op = pending();
     AND(rn(t0), r0, r1);
+    flush();
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
-    NOP(1);
+    delay(op);
     jit_unget_reg(t0);
     return (w);
 }
@@ -2958,14 +3724,14 @@ static jit_word_t
 _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
-
+    jit_int32_t                op, t0;
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+    op = pending();
     andi(rn(t0), r0, i1);
+    flush();
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
-    NOP(1);
-
+    delay(op);
     jit_unget_reg(t0);
     return (w);
 }
@@ -2973,78 +3739,112 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 static void
 _callr(jit_state_t *_jit, jit_int32_t r0)
 {
-    JALR(r0);
-    if (r0 != _T9_REGNO)
-       movr(_T9_REGNO, r0);
-    else
-       NOP(1);
+    jit_int32_t                op, t0;
+    if (r0 != _T9_REGNO) {
+       JALR(r0);
+       /* delay slot */
+        movr(_T9_REGNO, r0);
+       flush();
+    }
+    else {
+       /* make sure delay slot does not use r0 */
+       t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                       r0, _ZERO_REGNO);
+       op = pending();
+       JALR(r0);
+       delay(op);
+       if (t0 != JIT_NOREG)
+           jit_unget_reg(t0);
+    }
 }
 
-static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t patch)
 {
-    if (((_jit->pc.w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
-        if (can_sign_extend_short_p(i0)) {
-            JAL((i0 & ~0xf0000000) >> 2);
-            addiu(_T9_REGNO, _ZERO_REGNO, i0);
-            return;
-        }
-
-        if (can_zero_extend_short_p(i0)) {
-            JAL((i0 & ~0xf0000000) >> 2);
-            ORI(_T9_REGNO, _ZERO_REGNO, i0);
-            return;
+    jit_int32_t                op, t0;
+    jit_word_t         w, disp;
+    w = _jit->pc.w;
+    if (jit_mips2_p()) {
+       disp = ((i0 - w) >> 2) - 1;
+       if (patch || can_sign_extend_short_p(disp)) {
+           op = pending();
+           BGEZAL(_ZERO_REGNO, disp);  /* Renamed to BAL in mips release 6 */
+           delay(op);
+           goto done;
+       }
+    }
+    assert(!patch);
+    flush();
+    if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
+       if (can_sign_extend_short_p(i0)) {
+           JAL((i0 & ~0xf0000000) >> 2);
+           /* delay slot */
+           addiu(_T9_REGNO, _ZERO_REGNO, i0);
+       }
+       else if (can_zero_extend_short_p(i0)) {
+           JAL((i0 & ~0xf0000000) >> 2);
+           /* delay slot */
+           ORI(_T9_REGNO, _ZERO_REGNO, i0);
         }
-
-        if (can_sign_extend_int_p(i0)) {
-            if (i0 & 0xffff) {
-                LUI(_T9_REGNO, i0 >> 16);
-                JAL((i0 & ~0xf0000000) >> 2);
-                ORI(_T9_REGNO, _T9_REGNO, i0);
-            } else {
-                JAL((i0 & ~0xf0000000) >> 2);
-                LUI(_T9_REGNO, i0 >> 16);
+       else if (can_sign_extend_int_p(i0)) {
+           if (i0 & 0xffff) {
+               LUI(_T9_REGNO, i0 >> 16);
+               JAL((i0 & ~0xf0000000) >> 2);
+               /* delay slot */
+               ORI(_T9_REGNO, _T9_REGNO, i0);
             }
-            return;
+           else {
+               JAL((i0 & ~0xf0000000) >> 2);
+               /* delay slot */
+               LUI(_T9_REGNO, i0 >> 16);
+           }
         }
+       else
+           goto fallback;
     }
-
-    movi(_T9_REGNO, i0);
-    JALR(_T9_REGNO);
-    NOP(1);
+    else {
+    fallback:
+       /* make sure delay slot does not use _T9_REGNO */
+       t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                       _T9_REGNO, _ZERO_REGNO);
+       /* try to get an instruction before the call */
+       op = pending();
+       movi(_T9_REGNO, i0);
+       JALR(_T9_REGNO);
+       delay(op);
+       if (t0 != JIT_NOREG)
+           jit_unget_reg(t0);
+    }
+    done:
+    return (w);
 }
 
 static jit_word_t
 _calli_p(jit_state_t *_jit, jit_word_t i0)
 {
     jit_word_t         word;
-
+    jit_int32_t                op, t0;
+    /* make sure delay slot does not use _T9_REGNO */
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                   _T9_REGNO, _ZERO_REGNO);
+    op = pending();
+    /* implicit flush() */
     word = _jit->pc.w;
     movi_p(_T9_REGNO, i0);
     JALR(_T9_REGNO);
-    NOP(1);
-
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
     return (word);
 }
 
-static jit_int32_t fregs[] = {
-    _F30, _F28, _F26, _F24, _F22, _F20,
-#if !NEW_ABI
-    _F18, _F16,
-#endif
-};
-
-static jit_int32_t iregs[] = {
-    _S7, _S6, _S5, _S4, _S3, _S2, _S1, _S0,
-};
-
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                index;
-    jit_int32_t                offset;
+    jit_int32_t                reg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -3063,51 +3863,65 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
                              /* align stack at 8 bytes */
                              _jitc->function->self.aoff) + 7) & -8;
 #endif
-    /* callee save registers */
+
 #if NEW_ABI
-    if ((_jitc->function->self.call & jit_call_varargs) &&
-       jit_arg_reg_p(_jitc->function->vagp))
-       subi(_SP_REGNO, _SP_REGNO, stack_framesize + 64);
-    else
+    if (_jitc->function->stack)
+       _jitc->function->need_stack = 1;
+    if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               _jitc->function->need_stack = 1;
+               break;
+           }
+    }
+#else
+    /* Need always a frame due to the need to always allocate 16 bytes */
+    jit_check_frame();
 #endif
-       subi(_SP_REGNO, _SP_REGNO, stack_framesize);
-    offset = stack_framesize - (sizeof(jit_word_t) << 1);
-    for (index = 0; index < jit_size(fregs); index++, offset -= 8) {
-       if (jit_regset_tstbit(&_jitc->function->regset, fregs[index]))
-           stxi_d(offset, _SP_REGNO, rn(fregs[index]));
-    }
-    for (index = 0; index < jit_size(iregs);
-        index++, offset -= sizeof(jit_word_t)) {
-       if (jit_regset_tstbit(&_jitc->function->regset, iregs[index]))
-           stxi(offset, _SP_REGNO, rn(iregs[index]));
-    }
-    assert(offset >= sizeof(jit_word_t));
-    stxi(offset, _SP_REGNO, _RA_REGNO);
-    stxi(0, _SP_REGNO, _BP_REGNO);
-    movr(_BP_REGNO, _SP_REGNO);
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+    if (_jitc->function->need_frame) {
+       stxi(0, _SP_REGNO, _RA_REGNO);
+       stxi(STACK_SLOT, _SP_REGNO, _BP_REGNO);
+    }
+    /* callee save registers */
+    for (reg = 0, offs = STACK_SLOT << 1; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           stxi(offs, _SP_REGNO, rn(iregs[reg]));
+           offs += STACK_SLOT;
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame)
+       movr(_BP_REGNO, _SP_REGNO);
 
     /* alloca */
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
-       index = jit_get_reg(jit_class_gpr);
-       movi(rn(index), _jitc->function->self.aoff);
-       stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(index));
-       jit_unget_reg(index);
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(reg));
+       jit_unget_reg(reg);
     }
 
     if (_jitc->function->self.call & jit_call_varargs) {
+       for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg) {
+           offs = jit_framesize() - ((NUM_WORD_ARGS - reg) * STACK_SLOT);
 #if NEW_ABI
-       index = _jitc->function->vagp;
+           SD(rn(_A0 - reg), offs, _BP_REGNO);
 #else
-       index = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
-#endif
-       offset = stack_framesize + index * STACK_SLOT;
-       for (; jit_arg_reg_p(index); ++index, offset += STACK_SLOT) {
-#if NEW_ABI
-           SD(rn(_A0 - index), offset, _BP_REGNO);
-#else
-           stxi(offset +  WORD_ADJUST, _BP_REGNO, rn(_A0 - index));
+           offs += 16 + WORD_ADJUST;
+           stxi(offs, _BP_REGNO, rn(_A0 - reg));
 #endif
        }
     }
@@ -3116,48 +3930,51 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                index;
-    jit_int32_t                offset;
+    jit_int32_t                reg, offs;
     if (_jitc->function->assume_frame)
        return;
+
+    if (_jitc->function->need_frame) {
+       movr(_SP_REGNO, _BP_REGNO);
+       ldxi(_RA_REGNO, _SP_REGNO, 0);
+       ldxi(_BP_REGNO, _SP_REGNO, STACK_SLOT);
+    }
+
     /* callee save registers */
-    movr(_SP_REGNO, _BP_REGNO);
-    offset = stack_framesize - (sizeof(jit_word_t) << 1);
-    for (index = 0; index < jit_size(fregs); index++, offset -= 8) {
-       if (jit_regset_tstbit(&_jitc->function->regset, fregs[index]))
-           ldxi_d(rn(fregs[index]), _SP_REGNO, offset);
-    }
-    for (index = 0; index < jit_size(iregs);
-        index++, offset -= sizeof(jit_word_t)) {
-       if (jit_regset_tstbit(&_jitc->function->regset, iregs[index]))
-           ldxi(rn(iregs[index]), _SP_REGNO, offset);
-    }
-    assert(offset >= sizeof(jit_word_t));
-    ldxi(_RA_REGNO, _SP_REGNO, offset);
-    ldxi(_BP_REGNO, _SP_REGNO, 0);
+    for (reg = 0, offs = STACK_SLOT << 1; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
     JR(_RA_REGNO);
     /* delay slot */
-#if NEW_ABI
-    if ((_jitc->function->self.call & jit_call_varargs) &&
-       jit_arg_reg_p(_jitc->function->vagp))
-       addi(_SP_REGNO, _SP_REGNO, stack_framesize + 64);
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       addi(_SP_REGNO, _SP_REGNO, jit_framesize());
     else
-#endif
-       addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+       NOP(1);
+    flush();
 }
 
 static void
 _vastart(jit_state_t *_jit, jit_int32_t r0)
 {
     assert(_jitc->function->self.call & jit_call_varargs);
-    /* Initialize va_list to the first stack argument. */
 #if NEW_ABI
+    /* Initialize va_list to the first stack argument. */
     if (jit_arg_reg_p(_jitc->function->vagp))
-       addi(r0, _BP_REGNO, stack_framesize + _jitc->function->vagp *
-            sizeof(jit_int64_t));
+       addi(r0, _BP_REGNO,
+            jit_framesize() -
+            ((NUM_WORD_ARGS - _jitc->function->vagp) * STACK_SLOT));
     else
 #endif
-       addi(r0, _BP_REGNO, _jitc->function->self.size);
+       addi(r0, _BP_REGNO, jit_selfsize());
 }
 
 static void
@@ -3247,16 +4064,31 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
            break;
 
        case MIPS_COP1:                 case MIPS_COP2:
-           assert(i.rs.b == MIPS_BC);
-           switch (i.rt.b) {
-               case MIPS_BCF:          case MIPS_BCFL:
-               case MIPS_BCT:          case MIPS_BCTL:
-                   i.is.b = ((label - instr) >> 2) - 1;
-                   u.i[0] = i.op;
-                   break;
-               default:
-                   assert(!"unhandled branch opcode");
-                   break;
+           if (jit_mips6_p()) {
+               switch (i.rs.b) {
+                   case MIPS_BC1EQZ:   case MIPS_BC1NEZ:
+                       assert(jit_mips6_p());
+                       i.is.b = ((label - instr) >> 2) - 1;
+                       u.i[0] = i.op;
+                       break;
+                   default:
+                       assert(!"unhandled branch opcode");
+                       break;
+               }
+           }
+           else {
+               assert(i.rs.b == MIPS_BC);
+               switch (i.rt.b) {
+                   case MIPS_BCF:              case MIPS_BCFL:
+                   case MIPS_BCT:              case MIPS_BCTL:
+                       assert(!jit_mips6_p());
+                       i.is.b = ((label - instr) >> 2) - 1;
+                       u.i[0] = i.op;
+                       break;
+                   default:
+                       assert(!"unhandled branch opcode");
+                       break;
+               }
            }
            break;
 
index 6209fd6..8e3df86 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -27,6 +27,8 @@
 #  define MIPS_fmt_PS                  0x16            /* 2 x float32 */
 #  define MIPS_fmt_S_PU                        0x20
 #  define MIPS_fmt_S_PL                        0x26
+#  define MIPS_condn_S                 0x14            /* release 6 */
+#  define MIPS_condn_D                 0x15            /* release 6 */
 #  define MIPS_ADD_fmt                 0x00
 #  define MIPS_LWXC1                   0x00
 #  define MIPS_SUB_fmt                 0x01
 #  define MIPS_cond_NGE                        0x3d
 #  define MIPS_cond_LE                 0x3e
 #  define MIPS_cond_UGT                        0x3f
+/* Mips release 6 */
+#  define MIPS_cmp_AF                  0x00
+#  define MIPS_cmp_UN                  0x01
+#  define MIPS_cmp_EQ                  0x02
+#  define MIPS_cmp_UEQ                 0x03
+#  define MIPS_cmp_LT                  0x04
+#  define MIPS_cmp_ULT                 0x05
+#  define MIPS_cmp_LE                  0x06
+#  define MIPS_cmp_ULE                 0x07
+#  define MIPS_cmp_SAF                 0x08
+#  define MIPS_cmp_SUN                 0x09
+#  define MIPS_cmp_SEQ                 0x0a
+#  define MIPS_cmp_SUEQ                        0x0b
+#  define MIPS_cmp_SLT                 0x0c
+#  define MIPS_cmp_SULT                        0x0d
+#  define MIPS_cmp_SLE                 0x0e
+#  define MIPS_cmp_SULE                        0x0f
 #  define ADD_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_ADD_fmt)
 #  define ADD_D(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_ADD_fmt)
 #  define SUB_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_SUB_fmt)
 #  define SQRT_S(fd,fs)                        hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_SQRT_fmt)
 #  define SQRT_D(fd,fs)                        hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_SQRT_fmt)
 #  define MFC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MF,rt,fs,0,0)
+#  define MFHC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_MFH,rt,fs,0,0)
 #  define MTC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MT,rt,fs,0,0)
+#  define MTHC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_MTH,rt,fs,0,0)
 #  define DMFC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_DMF,rt,fs,0,0)
 #  define DMTC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_DMT,rt,fs,0,0)
 #  define CVT_D_S(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_D)
 #  define MOV_S(fd, fs)                        hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_MOV_fmt)
 #  define MOV_D(fd, fs)                        hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_MOV_fmt)
 #  define BC1F(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCF,im)
+#  define BC1EQZ(ft,im)                        hrri(MIPS_COP1,MIPS_BC1EQZ,ft,im)
 #  define BC1T(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCT,im)
+#  define BC1NEZ(ft,im)                        hrri(MIPS_COP1,MIPS_BC1NEZ,ft,im)
 #  define C_F_S(fs,ft)                 c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_F)
 #  define C_F_D(fs,ft)                 c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_F)
 #  define C_F_PS(fs,ft)                        c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_F)
 static void
 _c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
            jit_int32_t ft, jit_int32_t fs, jit_int32_t cc);
+#  define CMP_AF_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_AF)
+#  define CMP_AF_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_AF)
+#  define CMP_UN_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UN)
+#  define CMP_UN_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UN)
+#  define CMP_EQ_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_EQ)
+#  define CMP_EQ_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_EQ)
+#  define CMP_UEQ_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UEQ)
+#  define CMP_UEQ_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UEQ)
+#  define CMP_LT_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LT)
+#  define CMP_LT_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LT)
+#  define CMP_ULT_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULT)
+#  define CMP_ULT_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULT)
+#  define CMP_LE_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LE)
+#  define CMP_LE_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LE)
+#  define CMP_ULE_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULE)
+#  define CMP_ULE_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULE)
+#  define CMP_SAF_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SAF)
+#  define CMP_SAF_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SAF)
+#  define CMP_SUN_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUN)
+#  define CMP_SUN_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUN)
+#  define CMP_SEQ_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SEQ)
+#  define CMP_SEQ_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SEQ)
+#  define CMP_SUEQ_S(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUEQ)
+#  define CMP_SUEQ_D(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUEQ)
+#  define CMP_SLT_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLT)
+#  define CMP_SLT_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLT)
+#  define CMP_SULT_S(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULT)
+#  define CMP_SULT_D(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULT)
+#  define CMP_SLE_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLE)
+#  define CMP_SLE_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLE)
+#  define CMP_SULE_S(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULE)
+#  define CMP_SULE_D(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULE)
+#  define cmp_cond_fmt(fm,fd,ft,fs,cn) _cmp_cond_fmt(_jit,fm,fd,ft,fs,cn)
+static void
+_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd,
+             jit_int32_t ft, jit_int32_t fs, jit_int32_t cn);
 #  define addr_f(r0,r1,r2)             ADD_S(r0,r1,r2)
 #  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
 static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
@@ -220,7 +279,7 @@ static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
 #  define sqrtr_f(r0,r1)               SQRT_S(r0,r1)
 #  define sqrtr_d(r0,r1)               SQRT_D(r0,r1)
 #  define movr_w_f(r0, r1)             MTC1(r1, r0)
-#  define movr_f_w(r0, r1)             MFC1(r1, r0)
+#  define movr_f_w(r0, r1)             MFC1(r0, r1)
 #  define movi_f_w(r0, i0)             _movi_f_w(_jit, r0, i0)
 static void _movi_f_w(jit_state_t*,jit_int32_t,jit_float32_t*);
 #  define extr_f(r0, r1)               _extr_f(_jit, r0, r1)
@@ -565,7 +624,22 @@ _c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
     i.ft.b = ft;
     i.fm.b = fm;
     i.hc.b = MIPS_COP1;
-    ii(i.op);
+    instr(i.op);
+}
+
+static void
+_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd,
+             jit_int32_t ft, jit_int32_t fs, jit_int32_t cn)
+{
+    jit_instr_t                i;
+    i.op = 0;          /* must have bit 6 zero ed */
+    i.cn.b = cn;
+    i.ft.b = ft;
+    i.fs.b = fs;
+    i.fd.b = fd;
+    i.fm.b = fm;
+    i.hc.b = MIPS_COP1;
+    instr(i.op);
 }
 
 #  define fpr_opi(name, type, size)                                    \
@@ -829,16 +903,28 @@ static void
 _movr_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     assert(r1 == r2 - 1);
-    MTC1(r1, r0 + BE_P);
-    MTC1(r2, r0 + LE_P);
+    if (jit_mips6_p()) {
+       MTC1(r1, r0);
+       MTHC1(r2, r0);
+    }
+    else {
+       MTC1(r1, r0 + BE_P);
+       MTC1(r2, r0 + LE_P);
+    }
 }
 
 static void
 _movr_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     assert(r0 == r1 - 1);
-    MFC1(r0, r2 + BE_P);
-    MFC1(r1, r2 + LE_P);
+    if (jit_mips6_p()) {
+       MFC1(r0, r2);
+       MFHC1(r1, r2);
+    }
+    else {
+       MFC1(r0, r2 + BE_P);
+       MFC1(r1, r2 + LE_P);
+    }
 }
 
 static void
@@ -896,40 +982,40 @@ _truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-#  if __WORDSIZE == 64 || NEW_ABI
-    LDC1(r0, 0, r1);
-#  else
-    LWC1(r0 + BE_P, 0, r1);
-    LWC1(r0 + LE_P, 4, r1);
-#  endif
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI)
+       LDC1(r0, 0, r1);
+    else {
+       LWC1(r0 + BE_P, 0, r1);
+       LWC1(r0 + LE_P, 4, r1);
+    }
 }
 
 static void
 _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-#  if __WORDSIZE == 64 || NEW_ABI
-    if (can_sign_extend_short_p(i0))
-       LDC1(r0, i0, _ZERO_REGNO);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       LDC1(r0, 0, rn(reg));
-       jit_unget_reg(reg);
-    }
-#  else
-    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
-       LWC1(r0 + BE_P, i0, _ZERO_REGNO);
-       LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+       if (can_sign_extend_short_p(i0))
+           LDC1(r0, i0, _ZERO_REGNO);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDC1(r0, 0, rn(reg));
+           jit_unget_reg(reg);
+       }
     }
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       LWC1(r0 + BE_P, 0, rn(reg));
-       LWC1(r0 + LE_P, 4, rn(reg));
-       jit_unget_reg(reg);
+       if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+           LWC1(r0 + BE_P, i0, _ZERO_REGNO);
+           LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ldr_d(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
     }
-#  endif
 }
 
 static void
@@ -946,52 +1032,60 @@ static void
 _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-#  if __WORDSIZE == 64 || NEW_ABI
-    if (can_sign_extend_short_p(i0))
-       LDC1(r0, i0, r1);
-#  else
-    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
-       LWC1(r0 + BE_P, i0, r1);
-       LWC1(r0 + LE_P, i0 + 4, r1);
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+       if (can_sign_extend_short_p(i0))
+           LDC1(r0, i0, r1);
+       else
+           goto fallback;
     }
-#  endif
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_d(r0, rn(reg));
-       jit_unget_reg(reg);
+       if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+           LWC1(r0 + BE_P, i0, r1);
+           LWC1(r0 + LE_P, i0 + 4, r1);
+       }
+       else {
+       fallback:
+           reg = jit_get_reg(jit_class_gpr);
+           addi(rn(reg), r1, i0);
+           ldr_d(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
     }
 }
 
 static void
 _str_d(jit_state_t *_jit,jit_int32_t r0, jit_int32_t r1)
 {
-#  if __WORDSIZE == 64 || NEW_ABI
-    SDC1(r1, 0, r0);
-#  else
-    SWC1(r1 + BE_P, 0, r0);
-    SWC1(r1 + LE_P, 4, r0);
-#  endif
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI)
+       SDC1(r1, 0, r0);
+    else {
+       SWC1(r1 + BE_P, 0, r0);
+       SWC1(r1 + LE_P, 4, r0);
+    }
 }
 
 static void
 _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-#  if __WORDSIZE == 64 || NEW_ABI
-    if (can_sign_extend_short_p(i0))
-       SDC1(r0, i0, _ZERO_REGNO);
-#  else
-    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
-       SWC1(r0 + BE_P, i0, _ZERO_REGNO);
-       SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+    if (jit_mips6_p() ||  __WORDSIZE == 64 || NEW_ABI) {
+       if (can_sign_extend_short_p(i0))
+           SDC1(r0, i0, _ZERO_REGNO);
+       else
+           goto fallback;
     }
-#  endif
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       str_d(rn(reg), r0);
-       jit_unget_reg(reg);
+       if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+           SWC1(r0 + BE_P, i0, _ZERO_REGNO);
+           SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+       }
+       else {
+       fallback:
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           str_d(rn(reg), r0);
+           jit_unget_reg(reg);
+       }
     }
 }
 
@@ -1009,20 +1103,24 @@ static void
 _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
-#  if __WORDSIZE == 64 || NEW_ABI
-    if (can_sign_extend_short_p(i0))
-       SDC1(r1, i0, r0);
-#  else
-    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
-       SWC1(r1 + BE_P, i0, r0);
-       SWC1(r1 + LE_P, i0 + 4, r0);
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+       if (can_sign_extend_short_p(i0))
+           SDC1(r1, i0, r0);
+       else
+           goto fallback;
     }
-#  endif
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r0, i0);
-       str_d(rn(reg), r1);
-       jit_unget_reg(reg);
+       if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+           SWC1(r1 + BE_P, i0, r0);
+           SWC1(r1 + LE_P, i0 + 4, r0);
+       }
+       else {
+       fallback:
+           reg = jit_get_reg(jit_class_gpr);
+           addi(rn(reg), r0, i0);
+           str_d(rn(reg), r1);
+           jit_unget_reg(reg);
+       }
     }
 }
 
@@ -1058,30 +1156,49 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
     else
        DMTC1(_ZERO_REGNO, r0);
 #  else
-    if (_jitc->no_data)
-       reg = jit_get_reg(jit_class_gpr);
-    if (data.i[0]) {
+    if (jit_mips6_p()) {
        if (_jitc->no_data) {
-           movi(rn(reg), data.i[0]);
-           MTC1(rn(reg), r0 + BE_P);
+           reg = jit_get_reg(jit_class_gpr);
+#  if __WORDSIZE == 64
+           movi(rn(reg), data.l);
+           DMTC1(rn(reg), r0);
+#  else
+           movi(rn(reg), data.i[0 + BE_P]);
+           MTC1(rn(reg), r0);
+           movi(rn(reg), data.i[0 + LE_P]);
+           MTHC1(rn(reg), r0);
+#  endif
+           jit_unget_reg(reg);
        }
        else
-           ldi_f(r0 + BE_P, (jit_word_t)i0);
+           ldi_d(r0, (jit_word_t)i0);
     }
-    else
-       MTC1(_ZERO_REGNO, r0 + BE_P);
-    if (data.i[1]) {
-       if (_jitc->no_data) {
-           movi(rn(reg), data.i[1]);
-           MTC1(rn(reg), r0 + LE_P);
+    else {
+       if (_jitc->no_data)
+           reg = jit_get_reg(jit_class_gpr);
+       if (data.i[0]) {
+           if (_jitc->no_data) {
+               movi(rn(reg), data.i[0]);
+               MTC1(rn(reg), r0 + BE_P);
+           }
+           else
+               ldi_f(r0 + BE_P, (jit_word_t)i0);
        }
        else
-           ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4);
+           MTC1(_ZERO_REGNO, r0 + BE_P);
+       if (data.i[1]) {
+           if (_jitc->no_data) {
+               movi(rn(reg), data.i[1]);
+               MTC1(rn(reg), r0 + LE_P);
+           }
+           else
+               ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4);
+       }
+       else
+           MTC1(_ZERO_REGNO, r0 + LE_P);
+       if (_jitc->no_data)
+           jit_unget_reg(reg);
     }
-    else
-       MTC1(_ZERO_REGNO, r0 + LE_P);
-    if (_jitc->no_data)
-       jit_unget_reg(reg);
 #  endif
 }
 
@@ -1089,13 +1206,26 @@ static void
 _ltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LT_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_OLT_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(lt)
 
@@ -1103,13 +1233,26 @@ static void
 _ler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LE_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_OLE_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(le)
 
@@ -1117,13 +1260,26 @@ static void
 _eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_EQ_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_EQ_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(eq)
 
@@ -1131,13 +1287,26 @@ static void
 _ger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULT_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_ULT_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ge)
 
@@ -1145,13 +1314,26 @@ static void
 _gtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULE_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_ULE_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(gt)
 
@@ -1159,13 +1341,26 @@ static void
 _ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_EQ_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_EQ_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ne)
 
@@ -1173,13 +1368,26 @@ static void
 _unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULT_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_ULT_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(unlt)
 
@@ -1187,13 +1395,26 @@ static void
 _unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULE_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_ULE_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(unle)
 
@@ -1201,13 +1422,26 @@ static void
 _uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UEQ_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_UEQ_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(uneq)
 
@@ -1215,13 +1449,26 @@ static void
 _unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LT_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_OLT_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(unge)
 
@@ -1229,13 +1476,26 @@ static void
 _ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LE_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_OLE_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ungt)
 
@@ -1243,13 +1503,26 @@ static void
 _ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UEQ_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_UEQ_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ltgt)
 
@@ -1257,13 +1530,26 @@ static void
 _ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UN_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_UN_S(r1, r2);
+       flush();
+       /* cannot optimize delay slot */
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ord)
 
@@ -1271,13 +1557,26 @@ static void
 _unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UN_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_UN_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(unord)
 
@@ -1285,10 +1584,25 @@ static jit_word_t
 _bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LT_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLT_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(lt)
@@ -1297,10 +1611,25 @@ static jit_word_t
 _bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LE_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLE_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(le)
@@ -1309,10 +1638,25 @@ static jit_word_t
 _beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_EQ_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_EQ_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(eq)
@@ -1321,10 +1665,25 @@ static jit_word_t
 _bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULT_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULT_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ge)
@@ -1333,10 +1692,25 @@ static jit_word_t
 _bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULE_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULE_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(gt)
@@ -1345,10 +1719,25 @@ static jit_word_t
 _bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_EQ_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_EQ_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ne)
@@ -1357,10 +1746,25 @@ static jit_word_t
 _bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULT_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULT_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(unlt)
@@ -1369,10 +1773,25 @@ static jit_word_t
 _bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULE_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULE_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(unle)
@@ -1381,10 +1800,25 @@ static jit_word_t
 _buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UEQ_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UEQ_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(uneq)
@@ -1393,10 +1827,25 @@ static jit_word_t
 _bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LT_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLT_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(unge)
@@ -1405,10 +1854,25 @@ static jit_word_t
 _bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LE_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLE_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ungt)
@@ -1417,10 +1881,25 @@ static jit_word_t
 _bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UEQ_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UEQ_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ltgt)
@@ -1429,10 +1908,25 @@ static jit_word_t
 _bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UN_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UN_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ord)
@@ -1441,10 +1935,25 @@ static jit_word_t
 _bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UN_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UN_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(unord)
@@ -1453,13 +1962,26 @@ static void
 _ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LT_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_OLT_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(lt)
 
@@ -1467,13 +1989,26 @@ static void
 _ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LE_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_OLE_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(le)
 
@@ -1481,13 +2016,26 @@ static void
 _eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_EQ_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_EQ_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(eq)
 
@@ -1495,13 +2043,26 @@ static void
 _ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULT_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_ULT_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ge)
 
@@ -1509,13 +2070,26 @@ static void
 _gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULE_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_ULE_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(gt)
 
@@ -1523,13 +2097,26 @@ static void
 _ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_EQ_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_EQ_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ne)
 
@@ -1537,13 +2124,26 @@ static void
 _unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULT_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_ULT_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(unlt)
 
@@ -1551,13 +2151,26 @@ static void
 _unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULE_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_ULE_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(unle)
 
@@ -1565,13 +2178,26 @@ static void
 _uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UEQ_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_UEQ_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(uneq)
 
@@ -1579,13 +2205,26 @@ static void
 _unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LT_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_OLT_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(unge)
 
@@ -1593,13 +2232,26 @@ static void
 _ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LE_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_OLE_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ungt)
 
@@ -1607,13 +2259,26 @@ static void
 _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UEQ_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_UEQ_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ltgt)
 
@@ -1621,13 +2286,26 @@ static void
 _ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UN_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_UN_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ord)
 
@@ -1635,13 +2313,26 @@ static void
 _unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UN_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_UN_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(unord)
 
@@ -1649,10 +2340,25 @@ static jit_word_t
 _bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LT_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLT_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(lt)
@@ -1661,10 +2367,25 @@ static jit_word_t
 _bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LE_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLE_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(le)
@@ -1673,10 +2394,25 @@ static jit_word_t
 _beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_EQ_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_EQ_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(eq)
@@ -1685,10 +2421,25 @@ static jit_word_t
 _bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULT_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULT_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ge)
@@ -1697,10 +2448,25 @@ static jit_word_t
 _bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULE_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULE_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(gt)
@@ -1709,10 +2475,25 @@ static jit_word_t
 _bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_EQ_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_EQ_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ne)
@@ -1721,10 +2502,25 @@ static jit_word_t
 _bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULT_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULT_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(unlt)
@@ -1733,10 +2529,25 @@ static jit_word_t
 _bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULE_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULE_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(unle)
@@ -1745,10 +2556,25 @@ static jit_word_t
 _buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UEQ_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UEQ_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(uneq)
@@ -1757,10 +2583,25 @@ static jit_word_t
 _bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LT_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLT_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(unge)
@@ -1769,10 +2610,25 @@ static jit_word_t
 _bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LE_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLE_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ungt)
@@ -1781,10 +2637,25 @@ static jit_word_t
 _bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UEQ_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UEQ_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ltgt)
@@ -1793,10 +2664,25 @@ static jit_word_t
 _bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UN_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UN_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ord)
@@ -1805,10 +2691,25 @@ static jit_word_t
 _bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UN_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UN_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(unord)
index 91deb4b..0a7436d 100644 (file)
 
 #if __WORDSIZE == 32
-#if NEW_ABI
-#define JIT_INSTR_MAX 52
-    0, /* data */
-    0, /* live */
-    0, /* align */
-    0, /* save */
-    0, /* load */
-    0, /* #name */
-    0, /* #note */
-    0, /* label */
-    44,        /* prolog */
-    0, /* ellipsis */
-    0, /* va_push */
-    0, /* allocai */
-    0, /* allocar */
-    0, /* arg */
-    0, /* getarg_c */
-    0, /* getarg_uc */
-    0, /* getarg_s */
-    0, /* getarg_us */
-    0, /* getarg_i */
-    0, /* getarg_ui */
-    0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    0, /* va_start */
-    0, /* va_arg */
-    0, /* va_arg_d */
-    0, /* va_end */
-    4, /* addr */
-    12,        /* addi */
-    12,        /* addcr */
-    20,        /* addci */
-    28,        /* addxr */
-    28,        /* addxi */
-    4, /* subr */
-    12,        /* subi */
-    12,        /* subcr */
-    20,        /* subci */
-    28,        /* subxr */
-    28,        /* subxi */
-    16,        /* rsbi */
-    4, /* mulr */
-    12,        /* muli */
-    12,        /* qmulr */
-    20,        /* qmuli */
-    12,        /* qmulr_u */
-    20,        /* qmuli_u */
-    8, /* divr */
-    16,        /* divi */
-    8, /* divr_u */
-    16,        /* divi_u */
-    12,        /* qdivr */
-    16,        /* qdivi */
-    12,        /* qdivr_u */
-    16,        /* qdivi_u */
-    8, /* remr */
-    16,        /* remi */
-    8, /* remr_u */
-    16,        /* remi_u */
-    4, /* andr */
-    12,        /* andi */
-    4, /* orr */
-    12,        /* ori */
-    4, /* xorr */
-    12,        /* xori */
-    4, /* lshr */
-    4, /* lshi */
-    4, /* rshr */
-    4, /* rshi */
-    4, /* rshr_u */
-    4, /* rshi_u */
-    4, /* negr */
-    8, /* comr */
-    4, /* ltr */
-    4, /* lti */
-    4, /* ltr_u */
-    4, /* lti_u */
-    8, /* ler */
-    12,        /* lei */
-    8, /* ler_u */
-    12,        /* lei_u */
-    12,        /* eqr */
-    12,        /* eqi */
-    8, /* ger */
-    12,        /* gei */
-    8, /* ger_u */
-    12,        /* gei_u */
-    4, /* gtr */
-    8, /* gti */
-    4, /* gtr_u */
-    8, /* gti_u */
-    8, /* ner */
-    8, /* nei */
-    4, /* movr */
-    8, /* movi */
-    4, /* movnr */
-    4, /* movzr */
-    8, /* extr_c */
-    4, /* extr_uc */
-    8, /* extr_s */
-    4, /* extr_us */
-    0, /* extr_i */
-    0, /* extr_ui */
-    4, /* htonr_us */
-    4, /* htonr_ui */
-    0, /* htonr_ul */
-    4, /* ldr_c */
-    12,        /* ldi_c */
-    4, /* ldr_uc */
-    12,        /* ldi_uc */
-    4, /* ldr_s */
-    12,        /* ldi_s */
-    4, /* ldr_us */
-    12,        /* ldi_us */
-    4, /* ldr_i */
-    12,        /* ldi_i */
-    0, /* ldr_ui */
-    0, /* ldi_ui */
-    0, /* ldr_l */
-    0, /* ldi_l */
-    8, /* ldxr_c */
-    4, /* ldxi_c */
-    8, /* ldxr_uc */
-    4, /* ldxi_uc */
-    8, /* ldxr_s */
-    4, /* ldxi_s */
-    8, /* ldxr_us */
-    4, /* ldxi_us */
-    8, /* ldxr_i */
-    4, /* ldxi_i */
-    0, /* ldxr_ui */
-    0, /* ldxi_ui */
-    0, /* ldxr_l */
-    0, /* ldxi_l */
-    4, /* str_c */
-    12,        /* sti_c */
-    4, /* str_s */
-    12,        /* sti_s */
-    4, /* str_i */
-    12,        /* sti_i */
-    0, /* str_l */
-    0, /* sti_l */
-    8, /* stxr_c */
-    4, /* stxi_c */
-    8, /* stxr_s */
-    4, /* stxi_s */
-    8, /* stxr_i */
-    4, /* stxi_i */
-    0, /* stxr_l */
-    0, /* stxi_l */
-    12,        /* bltr */
-    12,        /* blti */
-    12,        /* bltr_u */
-    12,        /* blti_u */
-    12,        /* bler */
-    16,        /* blei */
-    12,        /* bler_u */
-    16,        /* blei_u */
-    8, /* beqr */
-    16,        /* beqi */
-    12,        /* bger */
-    12,        /* bgei */
-    12,        /* bger_u */
-    12,        /* bgei_u */
-    12,        /* bgtr */
-    16,        /* bgti */
-    12,        /* bgtr_u */
-    16,        /* bgti_u */
-    8, /* bner */
-    16,        /* bnei */
-    12,        /* bmsr */
-    12,        /* bmsi */
-    12,        /* bmcr */
-    12,        /* bmci */
-    28,        /* boaddr */
-    28,        /* boaddi */
-    16,        /* boaddr_u */
-    20,        /* boaddi_u */
-    28,        /* bxaddr */
-    28,        /* bxaddi */
-    16,        /* bxaddr_u */
-    20,        /* bxaddi_u */
-    28,        /* bosubr */
-    28,        /* bosubi */
-    16,        /* bosubr_u */
-    20,        /* bosubi_u */
-    28,        /* bxsubr */
-    28,        /* bxsubi */
-    16,        /* bxsubr_u */
-    20,        /* bxsubi_u */
-    0, /* jmpr */
-    8, /* jmpi */
-    12,        /* callr */
-    16,        /* calli */
-    0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
-    0, /* finishr */
-    0, /* finishi */
-    0, /* ret */
-    0, /* retr */
-    0, /* reti */
-    0, /* retval_c */
-    0, /* retval_uc */
-    0, /* retval_s */
-    0, /* retval_us */
-    0, /* retval_i */
-    0, /* retval_ui */
-    0, /* retval_l */
-    44,        /* epilog */
-    0, /* arg_f */
-    0, /* getarg_f */
-    0, /* putargr_f */
-    0, /* putargi_f */
-    4, /* addr_f */
-    16,        /* addi_f */
-    4, /* subr_f */
-    16,        /* subi_f */
-    16,        /* rsbi_f */
-    4, /* mulr_f */
-    16,        /* muli_f */
-    4, /* divr_f */
-    16,        /* divi_f */
-    4, /* negr_f */
-    4, /* absr_f */
-    4, /* sqrtr_f */
-    16,        /* ltr_f */
-    28,        /* lti_f */
-    16,        /* ler_f */
-    28,        /* lei_f */
-    16,        /* eqr_f */
-    28,        /* eqi_f */
-    16,        /* ger_f */
-    28,        /* gei_f */
-    16,        /* gtr_f */
-    28,        /* gti_f */
-    16,        /* ner_f */
-    28,        /* nei_f */
-    16,        /* unltr_f */
-    28,        /* unlti_f */
-    16,        /* unler_f */
-    28,        /* unlei_f */
-    16,        /* uneqr_f */
-    28,        /* uneqi_f */
-    16,        /* unger_f */
-    28,        /* ungei_f */
-    16,        /* ungtr_f */
-    28,        /* ungti_f */
-    16,        /* ltgtr_f */
-    28,        /* ltgti_f */
-    16,        /* ordr_f */
-    28,        /* ordi_f */
-    16,        /* unordr_f */
-    28,        /* unordi_f */
-    8, /* truncr_f_i */
-    0, /* truncr_f_l */
-    8, /* extr_f */
-    4, /* extr_d_f */
-    4, /* movr_f */
-    12,        /* movi_f */
-    4, /* ldr_f */
-    12,        /* ldi_f */
-    8, /* ldxr_f */
-    4, /* ldxi_f */
-    4, /* str_f */
-    12,        /* sti_f */
-    8, /* stxr_f */
-    4, /* stxi_f */
-    12,        /* bltr_f */
-    24,        /* blti_f */
-    12,        /* bler_f */
-    24,        /* blei_f */
-    12,        /* beqr_f */
-    24,        /* beqi_f */
-    12,        /* bger_f */
-    24,        /* bgei_f */
-    12,        /* bgtr_f */
-    24,        /* bgti_f */
-    12,        /* bner_f */
-    24,        /* bnei_f */
-    12,        /* bunltr_f */
-    24,        /* bunlti_f */
-    12,        /* bunler_f */
-    24,        /* bunlei_f */
-    12,        /* buneqr_f */
-    24,        /* buneqi_f */
-    12,        /* bunger_f */
-    24,        /* bungei_f */
-    12,        /* bungtr_f */
-    24,        /* bungti_f */
-    12,        /* bltgtr_f */
-    24,        /* bltgti_f */
-    12,        /* bordr_f */
-    24,        /* bordi_f */
-    12,        /* bunordr_f */
-    24,        /* bunordi_f */
-    0, /* pushargr_f */
-    0, /* pushargi_f */
-    0, /* retr_f */
-    0, /* reti_f */
-    0, /* retval_f */
-    0, /* arg_d */
-    0, /* getarg_d */
-    0, /* putargr_d */
-    0, /* putargi_d */
-    4, /* addr_d */
-    16,        /* addi_d */
-    4, /* subr_d */
-    16,        /* subi_d */
-    16,        /* rsbi_d */
-    4, /* mulr_d */
-    16,        /* muli_d */
-    4, /* divr_d */
-    16,        /* divi_d */
-    4, /* negr_d */
-    4, /* absr_d */
-    4, /* sqrtr_d */
-    16,        /* ltr_d */
-    28,        /* lti_d */
-    16,        /* ler_d */
-    28,        /* lei_d */
-    16,        /* eqr_d */
-    28,        /* eqi_d */
-    16,        /* ger_d */
-    28,        /* gei_d */
-    16,        /* gtr_d */
-    28,        /* gti_d */
-    16,        /* ner_d */
-    28,        /* nei_d */
-    16,        /* unltr_d */
-    28,        /* unlti_d */
-    16,        /* unler_d */
-    28,        /* unlei_d */
-    16,        /* uneqr_d */
-    28,        /* uneqi_d */
-    16,        /* unger_d */
-    28,        /* ungei_d */
-    16,        /* ungtr_d */
-    28,        /* ungti_d */
-    16,        /* ltgtr_d */
-    28,        /* ltgti_d */
-    16,        /* ordr_d */
-    28,        /* ordi_d */
-    16,        /* unordr_d */
-    28,        /* unordi_d */
-    8, /* truncr_d_i */
-    0, /* truncr_d_l */
-    8, /* extr_d */
-    4, /* extr_f_d */
-    4, /* movr_d */
-    12,        /* movi_d */
-    4, /* ldr_d */
-    12,        /* ldi_d */
-    8, /* ldxr_d */
-    4, /* ldxi_d */
-    4, /* str_d */
-    12,        /* sti_d */
-    8, /* stxr_d */
-    4, /* stxi_d */
-    12,        /* bltr_d */
-    24,        /* blti_d */
-    12,        /* bler_d */
-    24,        /* blei_d */
-    12,        /* beqr_d */
-    24,        /* beqi_d */
-    12,        /* bger_d */
-    24,        /* bgei_d */
-    12,        /* bgtr_d */
-    24,        /* bgti_d */
-    12,        /* bner_d */
-    24,        /* bnei_d */
-    12,        /* bunltr_d */
-    24,        /* bunlti_d */
-    12,        /* bunler_d */
-    24,        /* bunlei_d */
-    12,        /* buneqr_d */
-    24,        /* buneqi_d */
-    12,        /* bunger_d */
-    24,        /* bungei_d */
-    12,        /* bungtr_d */
-    24,        /* bungti_d */
-    12,        /* bltgtr_d */
-    24,        /* bltgti_d */
-    12,        /* bordr_d */
-    24,        /* bordi_d */
-    12,        /* bunordr_d */
-    24,        /* bunordi_d */
-    0, /* pushargr_d */
-    0, /* pushargi_d */
-    0, /* retr_d */
-    0, /* reti_d */
-    0, /* retval_d */
-    0, /* movr_w_f */
-    0, /* movr_ww_d */
-    0, /* movr_w_d */
-    0, /* movr_f_w */
-    0, /* movi_f_w */
-    0, /* movr_d_ww */
-    0, /* movi_d_ww */
-    4, /* movr_d_w */
-    12,        /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
-#endif /* NEW_ABI */
-#endif /* __WORDSIZE */
-
-#if __WORDSIZE == 32
-#if !NEW_ABI
 #define JIT_INSTR_MAX 116
     0, /* data */
     0, /* live */
-    0, /* align */
+    20,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     20,        /* va_arg_d */
     8, /* remr_u */
     16,        /* remi_u */
     4, /* andr */
-    12,        /* andi */
+    8, /* andi */
     4, /* orr */
     12,        /* ori */
     4, /* xorr */
     4, /* ltr_u */
     4, /* lti_u */
     8, /* ler */
-    12,        /* lei */
+    4, /* lei */
     8, /* ler_u */
-    12,        /* lei_u */
-    12,        /* eqr */
-    12,        /* eqi */
+    4, /* lei_u */
+    8, /* eqr */
+    8, /* eqi */
     8, /* ger */
-    12,        /* gei */
+    8, /* gei */
     8, /* ger_u */
-    12,        /* gei_u */
+    8, /* gei_u */
     4, /* gtr */
     8, /* gti */
     4, /* gtr_u */
     8, /* movi */
     4, /* movnr */
     4, /* movzr */
-    8, /* extr_c */
+    36,        /* casr */
+    44,        /* casi */
+    4, /* extr_c */
     4, /* extr_uc */
-    8, /* extr_s */
+    4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
-    20,        /* htonr_us */
-    52,        /* htonr_ui */
+    8, /* bswapr_us */
+    8, /* bswapr_ui */
+    0, /* bswapr_ul */
+    4, /* htonr_us */
+    4, /* htonr_ui */
     0, /* htonr_ul */
     4, /* ldr_c */
     12,        /* ldi_c */
     20,        /* bxsubi_u */
     8, /* jmpr */
     8, /* jmpi */
-    12,        /* callr */
+    8, /* callr */
     16,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     8, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
-#endif /* NEW_ABI */
+    8, /* clo */
+    8, /* clz */
+    76,        /* cto */
+    76,        /* ctz */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 116
+#define JIT_INSTR_MAX 76
     0, /* data */
     0, /* live */
-    4, /* align */
+    24,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
-    44,        /* prolog */
+    76,        /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    0, /* va_start */
-    0, /* va_arg */
-    0, /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     28,        /* addi */
     36,        /* subci */
     28,        /* subxr */
     28,        /* subxi */
-    32,        /* rsbi */
+    36,        /* rsbi */
     8, /* mulr */
     32,        /* muli */
     12,        /* qmulr */
     8, /* remr_u */
     32,        /* remi_u */
     4, /* andr */
-    28,        /* andi */
+    8, /* andi */
     4, /* orr */
     28,        /* ori */
     4, /* xorr */
     4, /* ltr_u */
     4, /* lti_u */
     8, /* ler */
-    12,        /* lei */
+    4, /* lei */
     8, /* ler_u */
-    12,        /* lei_u */
-    12,        /* eqr */
-    12,        /* eqi */
+    4, /* lei_u */
+    8, /* eqr */
+    8, /* eqi */
     8, /* ger */
-    12,        /* gei */
+    8, /* gei */
     8, /* ger_u */
-    12,        /* gei_u */
+    8, /* gei_u */
     4, /* gtr */
     8, /* gti */
     4, /* gtr_u */
     28,        /* movi */
     4, /* movnr */
     4, /* movzr */
-    8, /* extr_c */
+    36,        /* casr */
+    56,        /* casi */
+    4, /* extr_c */
     4, /* extr_uc */
-    8, /* extr_s */
+    4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
-    8, /* extr_ui */
+    4, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    44,        /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
-    12,        /* ldi_c */
+    24,        /* ldi_c */
     4, /* ldr_uc */
-    12,        /* ldi_uc */
+    24,        /* ldi_uc */
     4, /* ldr_s */
-    12,        /* ldi_s */
+    24,        /* ldi_s */
     4, /* ldr_us */
-    12,        /* ldi_us */
+    24,        /* ldi_us */
     4, /* ldr_i */
-    12,        /* ldi_i */
+    24,        /* ldi_i */
     4, /* ldr_ui */
-    12,        /* ldi_ui */
+    24,        /* ldi_ui */
     4, /* ldr_l */
-    12,        /* ldi_l */
+    24,        /* ldi_l */
     8, /* ldxr_c */
-    4, /* ldxi_c */
+    16,        /* ldxi_c */
     8, /* ldxr_uc */
-    4, /* ldxi_uc */
+    16,        /* ldxi_uc */
     8, /* ldxr_s */
-    4, /* ldxi_s */
+    16,        /* ldxi_s */
     8, /* ldxr_us */
-    4, /* ldxi_us */
+    16,        /* ldxi_us */
     8, /* ldxr_i */
-    4, /* ldxi_i */
+    16,        /* ldxi_i */
     8, /* ldxr_ui */
-    4, /* ldxi_ui */
+    16,        /* ldxi_ui */
     8, /* ldxr_l */
-    4, /* ldxi_l */
+    16,        /* ldxi_l */
     4, /* str_c */
-    12,        /* sti_c */
+    24,        /* sti_c */
     4, /* str_s */
-    12,        /* sti_s */
+    24,        /* sti_s */
     4, /* str_i */
-    12,        /* sti_i */
+    24,        /* sti_i */
     4, /* str_l */
-    12,        /* sti_l */
+    24,        /* sti_l */
     8, /* stxr_c */
-    4, /* stxi_c */
+    16,        /* stxi_c */
     8, /* stxr_s */
-    4, /* stxi_s */
+    16,        /* stxi_s */
     8, /* stxr_i */
-    4, /* stxi_i */
+    16,        /* stxi_i */
     8, /* stxr_l */
-    4, /* stxi_l */
+    16,        /* stxi_l */
     12,        /* bltr */
     12,        /* blti */
     12,        /* bltr_u */
     12,        /* bgtr_u */
     16,        /* bgti_u */
     8, /* bner */
-    32,        /* bnei */
+    28,        /* bnei */
     12,        /* bmsr */
     12,        /* bmsi */
     12,        /* bmcr */
     28,        /* bxsubi */
     16,        /* bxsubr_u */
     20,        /* bxsubi_u */
-    0, /* jmpr */
+    8, /* jmpr */
     8, /* jmpi */
-    12,        /* callr */
+    8, /* callr */
     32,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_i */
     0, /* retval_ui */
     0, /* retval_l */
-    44,        /* epilog */
+    76,        /* epilog */
     0, /* arg_f */
     0, /* getarg_f */
     0, /* putargr_f */
     0, /* putargi_f */
     4, /* addr_f */
-    16,        /* addi_f */
+    28,        /* addi_f */
     4, /* subr_f */
-    16,        /* subi_f */
-    16,        /* rsbi_f */
+    28,        /* subi_f */
+    28,        /* rsbi_f */
     4, /* mulr_f */
-    16,        /* muli_f */
+    28,        /* muli_f */
     4, /* divr_f */
-    16,        /* divi_f */
+    28,        /* divi_f */
     4, /* negr_f */
     4, /* absr_f */
     4, /* sqrtr_f */
     16,        /* ltr_f */
-    28,        /* lti_f */
+    40,        /* lti_f */
     16,        /* ler_f */
-    28,        /* lei_f */
+    40,        /* lei_f */
     16,        /* eqr_f */
-    28,        /* eqi_f */
+    40,        /* eqi_f */
     16,        /* ger_f */
-    28,        /* gei_f */
+    40,        /* gei_f */
     16,        /* gtr_f */
-    28,        /* gti_f */
+    40,        /* gti_f */
     16,        /* ner_f */
-    28,        /* nei_f */
+    40,        /* nei_f */
     16,        /* unltr_f */
-    28,        /* unlti_f */
+    40,        /* unlti_f */
     16,        /* unler_f */
-    28,        /* unlei_f */
+    40,        /* unlei_f */
     16,        /* uneqr_f */
-    28,        /* uneqi_f */
+    40,        /* uneqi_f */
     16,        /* unger_f */
-    28,        /* ungei_f */
+    40,        /* ungei_f */
     16,        /* ungtr_f */
-    28,        /* ungti_f */
+    40,        /* ungti_f */
     16,        /* ltgtr_f */
-    28,        /* ltgti_f */
+    40,        /* ltgti_f */
     16,        /* ordr_f */
-    28,        /* ordi_f */
+    40,        /* ordi_f */
     16,        /* unordr_f */
-    28,        /* unordi_f */
+    40,        /* unordi_f */
     8, /* truncr_f_i */
     8, /* truncr_f_l */
     8, /* extr_f */
     4, /* extr_d_f */
     4, /* movr_f */
-    12,        /* movi_f */
+    24,        /* movi_f */
     4, /* ldr_f */
-    12,        /* ldi_f */
+    24,        /* ldi_f */
     8, /* ldxr_f */
-    4, /* ldxi_f */
+    16,        /* ldxi_f */
     4, /* str_f */
-    12,        /* sti_f */
+    24,        /* sti_f */
     8, /* stxr_f */
-    4, /* stxi_f */
+    16,        /* stxi_f */
     12,        /* bltr_f */
-    24,        /* blti_f */
+    36,        /* blti_f */
     12,        /* bler_f */
-    24,        /* blei_f */
+    36,        /* blei_f */
     12,        /* beqr_f */
-    24,        /* beqi_f */
+    36,        /* beqi_f */
     12,        /* bger_f */
-    24,        /* bgei_f */
+    36,        /* bgei_f */
     12,        /* bgtr_f */
-    24,        /* bgti_f */
+    36,        /* bgti_f */
     12,        /* bner_f */
-    24,        /* bnei_f */
+    36,        /* bnei_f */
     12,        /* bunltr_f */
-    24,        /* bunlti_f */
+    36,        /* bunlti_f */
     12,        /* bunler_f */
-    24,        /* bunlei_f */
+    36,        /* bunlei_f */
     12,        /* buneqr_f */
-    24,        /* buneqi_f */
+    36,        /* buneqi_f */
     12,        /* bunger_f */
-    24,        /* bungei_f */
+    36,        /* bungei_f */
     12,        /* bungtr_f */
-    24,        /* bungti_f */
+    36,        /* bungti_f */
     12,        /* bltgtr_f */
-    24,        /* bltgti_f */
+    36,        /* bltgti_f */
     12,        /* bordr_f */
-    24,        /* bordi_f */
+    36,        /* bordi_f */
     12,        /* bunordr_f */
-    24,        /* bunordi_f */
+    36,        /* bunordi_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* putargr_d */
     0, /* putargi_d */
     4, /* addr_d */
-    16,        /* addi_d */
+    28,        /* addi_d */
     4, /* subr_d */
-    16,        /* subi_d */
-    16,        /* rsbi_d */
+    28,        /* subi_d */
+    28,        /* rsbi_d */
     4, /* mulr_d */
-    16,        /* muli_d */
+    28,        /* muli_d */
     4, /* divr_d */
-    16,        /* divi_d */
+    28,        /* divi_d */
     4, /* negr_d */
     4, /* absr_d */
     4, /* sqrtr_d */
     16,        /* ltr_d */
-    28,        /* lti_d */
+    44,        /* lti_d */
     16,        /* ler_d */
-    28,        /* lei_d */
+    44,        /* lei_d */
     16,        /* eqr_d */
-    28,        /* eqi_d */
+    44,        /* eqi_d */
     16,        /* ger_d */
-    28,        /* gei_d */
+    44,        /* gei_d */
     16,        /* gtr_d */
-    28,        /* gti_d */
+    44,        /* gti_d */
     16,        /* ner_d */
-    28,        /* nei_d */
+    44,        /* nei_d */
     16,        /* unltr_d */
-    28,        /* unlti_d */
+    44,        /* unlti_d */
     16,        /* unler_d */
-    28,        /* unlei_d */
+    44,        /* unlei_d */
     16,        /* uneqr_d */
-    28,        /* uneqi_d */
+    44,        /* uneqi_d */
     16,        /* unger_d */
-    28,        /* ungei_d */
+    44,        /* ungei_d */
     16,        /* ungtr_d */
-    28,        /* ungti_d */
+    44,        /* ungti_d */
     16,        /* ltgtr_d */
-    28,        /* ltgti_d */
+    44,        /* ltgti_d */
     16,        /* ordr_d */
-    28,        /* ordi_d */
+    44,        /* ordi_d */
     16,        /* unordr_d */
-    28,        /* unordi_d */
+    44,        /* unordi_d */
     8, /* truncr_d_i */
     8, /* truncr_d_l */
     8, /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    12,        /* movi_d */
+    28,        /* movi_d */
     4, /* ldr_d */
-    12,        /* ldi_d */
+    24,        /* ldi_d */
     8, /* ldxr_d */
-    4, /* ldxi_d */
+    16,        /* ldxi_d */
     4, /* str_d */
-    12,        /* sti_d */
+    24,        /* sti_d */
     8, /* stxr_d */
-    4, /* stxi_d */
+    16,        /* stxi_d */
     12,        /* bltr_d */
-    24,        /* blti_d */
+    36,        /* blti_d */
     12,        /* bler_d */
-    24,        /* blei_d */
+    36,        /* blei_d */
     12,        /* beqr_d */
-    24,        /* beqi_d */
+    36,        /* beqi_d */
     12,        /* bger_d */
-    24,        /* bgei_d */
+    36,        /* bgei_d */
     12,        /* bgtr_d */
-    24,        /* bgti_d */
+    36,        /* bgti_d */
     12,        /* bner_d */
-    24,        /* bnei_d */
+    40,        /* bnei_d */
     12,        /* bunltr_d */
-    24,        /* bunlti_d */
+    40,        /* bunlti_d */
     12,        /* bunler_d */
-    24,        /* bunlei_d */
+    40,        /* bunlei_d */
     12,        /* buneqr_d */
-    24,        /* buneqi_d */
+    40,        /* buneqi_d */
     12,        /* bunger_d */
-    24,        /* bungei_d */
+    40,        /* bungei_d */
     12,        /* bungtr_d */
-    24,        /* bungti_d */
+    40,        /* bungti_d */
     12,        /* bltgtr_d */
-    24,        /* bltgti_d */
+    36,        /* bltgti_d */
     12,        /* bordr_d */
-    24,        /* bordi_d */
+    36,        /* bordi_d */
     12,        /* bunordr_d */
-    24,        /* bunordi_d */
+    40,        /* bunordi_d */
     0, /* pushargr_d */
     0, /* pushargi_d */
     0, /* retr_d */
     0, /* movr_d_ww */
     0, /* movi_d_ww */
     4, /* movr_d_w */
-    12,        /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    116,       /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
+    24,        /* movi_d_w */
+    4, /* clo */
+    4, /* clz */
+    72,        /* cto */
+    72,        /* ctz */
 #endif /* __WORDSIZE */
index d98d94e..6d56423 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
 #  include <sys/cachectl.h>
 #endif
 
+#if NEW_ABI
+/*   callee save                                   + variadic arguments
+ *   align16(ra+fp+s[0-7]++f20+f22+f24+f26+f28+f30) + align16(a[0-7]) */
+#  define stack_framesize              (128 + 64)
+#else
+/*   callee save
+ *   align16(ra+fp+s[0-7]+f16+f18+f20+f22+f24+f26+f28+f30) */
+#  define stack_framesize              128
+#endif
+
 #if NEW_ABI
 #  define NUM_WORD_ARGS                        8
 #  define STACK_SLOT                   8
@@ -54,12 +64,14 @@ typedef struct jit_pointer_t jit_va_list_t;
 /*
  * Prototypes
  */
-#define jit_make_arg(node)             _jit_make_arg(_jit,node)
-static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*);
+#define jit_make_arg(node,code)                _jit_make_arg(_jit,node,code)
+static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t);
 #define jit_make_arg_f(node)           _jit_make_arg_f(_jit,node)
 static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
 #define jit_make_arg_d(node)           _jit_make_arg_d(_jit,node)
 static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*);
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
@@ -67,11 +79,13 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #  include "jit_rewind.c"
 #  include "jit_mips-cpu.c"
 #  include "jit_mips-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
  * Initialization
  */
+jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0x01,                  "at" },
     { rc(gpr) | 0x02,                  "v0" },
@@ -145,12 +159,49 @@ jit_register_t            _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7
+};
+
+static jit_int32_t fregs[] = {
+#if !NEW_ABI
+    _F16, _F18,
+#endif
+    _F20, _F22, _F24, _F26, _F28, _F30
+};
+
 /*
  * Implementation
  */
 void
 jit_get_cpu(void)
 {
+#if defined(__linux__)
+    FILE       *fp;
+    char       *ptr;
+    char        buf[128];
+
+    if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
+       while (fgets(buf, sizeof(buf), fp)) {
+           if (strncmp(buf, "isa                       : ", 8) == 0) {
+               if ((ptr = strstr(buf + 9, "mips64r")))
+                   jit_cpu.release = strtoul(ptr + 7, NULL, 10);
+               break;
+           }
+       }
+       fclose(fp);
+    }
+#endif
+#if __mips_isa_rev
+    if (!jit_cpu.release)
+       jit_cpu.release = __mips_isa_rev;
+#elif defined _MIPS_ARCH
+    if (!jit_cpu.release)
+       jit_cpu.release = strtoul(&_MIPS_ARCH[4], NULL, 10);
+#elif defined(__mips) && __mips < 6
+    if (!jit_cpu.release)
+       jit_cpu.release = __mips;
+#endif
 }
 
 void
@@ -211,6 +262,7 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -259,20 +311,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -332,18 +382,18 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
 #if NEW_ABI
-    return (jit_arg_reg_p(u->u.w));
+    return (jit_arg_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
 #else
     return (u->u.w < 8);
 #endif
 }
 
 static jit_node_t *
-_jit_make_arg(jit_state_t *_jit, jit_node_t *node)
+_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code)
 {
     jit_int32_t                 offset;
 #if NEW_ABI
@@ -355,13 +405,13 @@ _jit_make_arg(jit_state_t *_jit, jit_node_t *node)
     }
 #else
     offset = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
-    _jitc->function->self.argi = 1;
+    ++_jitc->function->self.argi;
     if (offset >= 4)
        offset = _jitc->function->self.size;
     _jitc->function->self.size += STACK_SLOT;
 #endif
     if (node == (jit_node_t *)0)
-       node = jit_new_node(jit_code_arg);
+       node = jit_new_node(code);
     else
        link_node(node);
     node->u.w = offset;
@@ -469,7 +519,6 @@ _jit_ellipsis(jit_state_t *_jit)
     else {
        assert(!(_jitc->function->self.call & jit_call_varargs));
 #if NEW_ABI
-       /* If varargs start in a register, allocate extra 64 bytes. */
        if (jit_arg_reg_p(_jitc->function->self.argi))
            rewind_prolog();
        /* Do not set during possible rewind. */
@@ -482,6 +531,7 @@ _jit_ellipsis(jit_state_t *_jit)
        _jitc->function->vagp = _jitc->function->self.argi;
     }
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare)
        jit_link_prepare();
     else
@@ -498,10 +548,14 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     assert(_jitc->function);
-    return (jit_make_arg((jit_node_t*)0));
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
+    return (jit_make_arg((jit_node_t*)0, code));
 }
 
 jit_node_t *
@@ -521,55 +575,67 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _A0 - v->u.w);
-    else
-       jit_ldxi_c(u, _FP, v->u.w + C_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_c(u, _FP, v->u.w + C_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _A0 - v->u.w);
-    else
-       jit_ldxi_uc(u, _FP, v->u.w + C_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_uc(u, _FP, v->u.w + C_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _A0 - v->u.w);
-    else
-       jit_ldxi_s(u, _FP, v->u.w + S_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_s(u, _FP, v->u.w + S_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _A0 - v->u.w);
-    else
-       jit_ldxi_us(u, _FP, v->u.w + S_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_us(u, _FP, v->u.w + S_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #if __WORDSIZE == 64
@@ -578,8 +644,11 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr(u, _A0 - v->u.w);
 #endif
     }
-    else
-       jit_ldxi_i(u, _FP, v->u.w + I_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_i(u, _FP, v->u.w + I_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
@@ -587,52 +656,64 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _A0 - v->u.w);
-    else
-       jit_ldxi_ui(u, _FP, v->u.w + I_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, _FP, v->u.w + I_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _A0 - v->u.w);
-    else
-       jit_ldxi_l(u, _FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, _FP, v->u.w);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 #endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    jit_inc_synth_wp(putargr, u, v);
-    assert(v->code == jit_code_arg);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_A0 - v->u.w, u);
-    else
-       jit_stxi(v->u.w + WORD_ADJUST, _FP, u);
+    else {
+       jit_node_t      *node = jit_stxi(v->u.w + WORD_ADJUST, _FP, u);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_A0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w + WORD_ADJUST, _FP, regno);
+       node = jit_stxi(v->u.w + WORD_ADJUST, _FP, regno);
+       jit_link_alist(node);
+       jit_check_frame();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -647,15 +728,18 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movr_f(u, _F12 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
-       jit_movr_w_f(u, _A0 - v->u.w - 8);
+       jit_movr_w_f(u, _A0 - (v->u.w - 8));
 #else
     if (v->u.w < 4)
        jit_movr_w_f(u, _A0 - v->u.w);
     else if (v->u.w < 8)
        jit_movr_f(u, _F12 - ((v->u.w - 4) >> 1));
 #endif
-    else
-       jit_ldxi_f(u, _FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_f(u, _FP, v->u.w);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
@@ -668,15 +752,18 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movr_f(_F12 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
-       jit_movr_f_w(_A0 - v->u.w - 8, u);
+       jit_movr_f_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4)
        jit_movr_f_w(_A0 - v->u.w, u);
     else if (v->u.w < 8)
        jit_movr_f(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
-    else
-       jit_stxi_f(v->u.w, _FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_f(v->u.w, _FP, u);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
@@ -689,12 +776,8 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
 #if NEW_ABI
     if (jit_arg_reg_p(v->u.w))
        jit_movi_f(_F12 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_f(regno, u);
-       jit_movr_f_w(_A0 - v->u.w - 8, u);
-       jit_unget_reg(regno);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_f_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4) {
        regno = jit_get_reg(jit_class_fpr);
@@ -706,9 +789,12 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
        jit_movi_f(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, _FP, regno);
+       node = jit_stxi_f(v->u.w, _FP, regno);
+       jit_link_alist(node);
+       jit_check_frame();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -723,15 +809,18 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movr_d(u, _F12 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
-       jit_movr_d_w(_A0 - v->u.w - 8, u);
+       jit_movr_d_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4)
        jit_movr_ww_d(u, _A0 - v->u.w, _A0 - (v->u.w + 1));
     else if (v->u.w < 8)
        jit_movr_d(u, _F12 - ((v->u.w - 4) >> 1));
 #endif
-    else
-       jit_ldxi_d(u, _FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_d(u, _FP, v->u.w);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
@@ -744,15 +833,18 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movr_d(_F12 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
-       jit_movr_d_w(_A0 - v->u.w - 8, u);
+       jit_movr_d_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4)
        jit_movr_d_ww(_A0 - v->u.w, _A0 - (v->u.w + 1), u);
     else if (v->u.w < 8)
        jit_movr_d(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
-    else
-       jit_stxi_d(v->u.w, _FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_d(v->u.w, _FP, u);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
@@ -765,12 +857,8 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 #if NEW_ABI
     if (jit_arg_reg_p(v->u.w))
        jit_movi_d(_F12 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_d(regno, u);
-       jit_movr_d_w(_A0 - v->u.w - 8, u);
-       jit_unget_reg(regno);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_d_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4) {
        regno = jit_get_reg(jit_class_fpr);
@@ -782,18 +870,21 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
        jit_movi_d(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, _FP, regno);
+       node = jit_stxi_d(v->u.w, _FP, regno);
+       jit_link_alist(node);
+       jit_check_frame();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
 #if NEW_ABI
     assert(_jitc->function);
@@ -802,6 +893,7 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, u);
        _jitc->function->call.size += STACK_SLOT;
     }
@@ -809,25 +901,27 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
     jit_word_t         offset;
     assert(_jitc->function);
     offset = _jitc->function->call.size >> STACK_SHIFT;
-    _jitc->function->call.argi = 1;
+    ++_jitc->function->call.argi;
     if (jit_arg_reg_p(offset))
        jit_movr(_A0 - offset, u);
-    else
+    else {
+       jit_check_frame();
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
+    }
     _jitc->function->call.size += STACK_SLOT;
 #endif
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                regno;
 #if !NEW_ABI
     jit_word_t         offset;
 #endif
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
 #if NEW_ABI
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -835,6 +929,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, regno);
@@ -847,6 +942,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
     if (jit_arg_reg_p(offset))
        jit_movi(_A0 - offset, u);
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
@@ -875,6 +971,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += STACK_SLOT;
     }
@@ -889,8 +986,10 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argi;
        jit_movr_f_w(_A0 - offset, u);
     }
-    else
+    else {
+       jit_check_frame();
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+    }
     _jitc->function->call.size += STACK_SLOT;
 #endif
     jit_dec_synth();
@@ -915,6 +1014,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
@@ -933,6 +1033,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        jit_movi_f_w(_A0 - offset, u);
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
@@ -962,6 +1063,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += STACK_SLOT;
     }
@@ -982,8 +1084,10 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
            ++_jitc->function->call.argf;
        }
     }
-    else
+    else {
+       jit_check_frame();
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+    }
     _jitc->function->call.size += sizeof(jit_float64_t);
 #endif
     jit_dec_synth();
@@ -1009,6 +1113,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
@@ -1033,6 +1138,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        }
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
@@ -1070,6 +1176,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *call;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -1090,13 +1197,12 @@ jit_node_t *
 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *call;
-    jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
-    node = jit_movi(_T9, (jit_word_t)i0);
-    call = jit_callr(_T9);
+    call = jit_calli(i0);
     call->v.w = _jitc->function->call.argi;
 #if NEW_ABI
     call->w.w = call->v.w;
@@ -1107,7 +1213,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
        _jitc->function->call.size = 0;
     _jitc->prepare = 0;
     jit_dec_synth();
-    return (node);
+    return (call);
 }
 
 void
@@ -1182,9 +1288,11 @@ _emit_code(jit_state_t *_jit)
     jit_word_t          word;
     jit_int32_t                 value;
     jit_int32_t                 offset;
+
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1296,18 +1404,30 @@ _emit_code(jit_state_t *_jit)
        prevw = _jit->pc.w;
 #endif
        value = jit_classify(node->code);
+#if GET_JIT_SIZE
+       flush();
+#endif
        jit_regarg_set(node, value);
        switch (node->code) {
            case jit_code_align:
                /* Must align to a power of two */
                assert(!(node->u.w & (node->u.w - 1)));
+               flush();
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
+               flush();
+               break;
+           case jit_code_skip:
+               flush();
+               nop((node->u.w + 3) & ~3);
+               flush();
                break;
            case jit_code_note:         case jit_code_name:
+               flush();
                node->u.w = _jit->pc.w;
                break;
            case jit_code_label:
+               flush();
                /* remember label is defined */
                node->flag |= jit_flag_patch;
                node->u.w = _jit->pc.w;
@@ -1461,6 +1581,10 @@ _emit_code(jit_state_t *_jit)
                break;
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
@@ -1688,6 +1812,7 @@ _emit_code(jit_state_t *_jit)
                case_brr(bunord, _d);
                case_brf(bunord, _d, 64);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -1696,16 +1821,24 @@ _emit_code(jit_state_t *_jit)
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
-                       jmpi(temp->u.w);
+                       jmpi(temp->u.w, 0);
                    else {
-                       word = jmpi(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (jit_mips2_p() && can_relative_jump_p(word))
+                           word = jmpi(_jit->pc.w, 1);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
-               else
-                   jmpi(node->u.w);
+               else {
+                   jit_check_frame();
+                   jmpi(node->u.w, 0);
+               }
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
@@ -1713,23 +1846,37 @@ _emit_code(jit_state_t *_jit)
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
-                   word = calli_p(temp->u.w);
-                   if (!(temp->flag & jit_flag_patch))
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w, 0);
+                   else {
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (jit_mips2_p() && can_relative_jump_p(word))
+                           word = calli(_jit->pc.w, 1);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
+                   }
+               }
+               else {
+                   jit_check_frame();
+                   calli(node->u.w, 0);
                }
-               else
-                   calli(node->u.w);
                break;
            case jit_code_prolog:
+               flush();
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
                _jitc->again = 0;
+               compute_framesize();
+               patch_alist(0);
                prolog(node);
                break;
            case jit_code_epilog:
@@ -1744,13 +1891,29 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* this will be recomputed but undo anyway to have it
+                    * better self documented.*/
+                   undo.func.need_stack = _jitc->function->need_stack;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                /* remember label is defined */
+               flush();
                node->flag |= jit_flag_patch;
                node->u.w = _jit->pc.w;
                epilog(node);
@@ -1798,14 +1961,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
-           case jit_code_live:
-           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1815,10 +1990,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1848,6 +2039,9 @@ _emit_code(jit_state_t *_jit)
                    break;
            }
        }
+#if GET_JIT_SIZE
+       flush();
+#endif
        jit_regarg_clr(node, value);
        assert(_jitc->regarg == 0 ||
               (jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry)));
@@ -1855,6 +2049,7 @@ _emit_code(jit_state_t *_jit)
        /* update register live state */
        jit_reglive(node);
     }
+    flush();
 #undef case_brf
 #undef case_brw
 #undef case_brr
@@ -1881,6 +2076,7 @@ _emit_code(jit_state_t *_jit)
 #  include "jit_rewind.c"
 #  include "jit_mips-cpu.c"
 #  include "jit_mips-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
@@ -1920,6 +2116,29 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     stxi_d(i0, rn(r0), rn(r1));
 }
 
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = STACK_SLOT << 1;        /* ra+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += STACK_SLOT;
+
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+
+#if NEW_ABI
+    /* Space to store variadic arguments */
+    if (_jitc->function->self.call & jit_call_varargs)
+       _jitc->framesize += (NUM_WORD_ARGS - _jitc->function->vagp) * STACK_SLOT;
+#endif
+
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
index b663b67..e5985a3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -21,17 +21,27 @@ static char *code_name[] = {
     "data",
     "live",            "align",
     "save",            "load",
+    "skip",
     "#name",           "#note",
     "label",
     "prolog",
     "ellipsis",                "va_push",
     "allocai",         "allocar",
-    "arg",
+    "arg_c",
+    "arg_s",
+    "arg_i",
+    "arg_l",
     "getarg_c",                "getarg_uc",
     "getarg_s",                "getarg_us",
     "getarg_i",                "getarg_ui",
     "getarg_l",
-    "putargr",         "putargi",
+    "putargr_c",       "putargi_c",
+    "putargr_uc",      "putargi_uc",
+    "putargr_s",       "putargi_s",
+    "putargr_us",      "putargi_us",
+    "putargr_i",       "putargi_i",
+    "putargr_ui",      "putargi_ui",
+    "putargr_l",       "putargi_l",
     "va_start",
     "va_arg",          "va_arg_d",
     "va_end",
@@ -70,9 +80,12 @@ static char *code_name[] = {
     "ner",             "nei",
     "movr",            "movi",
     "movnr",           "movzr",
+    "casr",            "casi",
     "extr_c",          "extr_uc",
     "extr_s",          "extr_us",
     "extr_i",          "extr_ui",
+    "bswapr_us",
+    "bswapr_ui",       "bswapr_ul",
     "htonr_us",
     "htonr_ui",                "htonr_ul",
     "ldr_c",           "ldi_c",
@@ -120,10 +133,22 @@ static char *code_name[] = {
     "jmpr",            "jmpi",
     "callr",           "calli",
     "prepare",
-    "pushargr",                "pushargi",
+    "pushargr_c",      "pushargi_c",
+    "pushargr_uc",     "pushargi_uc",
+    "pushargr_s",      "pushargi_s",
+    "pushargr_us",     "pushargi_us",
+    "pushargr_i",      "pushargi_i",
+    "pushargr_ui",     "pushargi_ui",
+    "pushargr_l",      "pushargi_l",
     "finishr",         "finishi",
     "ret",
-    "retr",            "reti",
+    "retr_c",          "reti_c",
+    "retr_uc",         "reti_uc",
+    "retr_s",          "reti_s",
+    "retr_us",         "reti_us",
+    "retr_i",          "reti_i",
+    "retr_ui",         "reti_ui",
+    "retr_l",          "reti_l",
     "retval_c",                "retval_uc",
     "retval_s",                "retval_us",
     "retval_i",                "retval_ui",
@@ -228,7 +253,6 @@ static char *code_name[] = {
     "movr_f_w",                "movi_f_w",
     "movr_d_ww",       "movi_d_ww",
     "movr_d_w",                "movi_d_w",
-    "bswapr_us",
-    "bswapr_ui",               "bswapr_ul",
-    "casr",            "casi",
+    "clo",             "clz",
+    "cto",             "ctz",
 };
index f1c149f..b055619 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index f205db0..67874c6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -202,8 +202,21 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int);
 #  define XCMPLI(cr,l,a,u)             FCI(10,cr,l,a,u)
 #  define CMPLDI(a,s)                  XCMPLI(0,1,a,s)
 #  define CMPLWI(a,s)                  XCMPLI(0,0,a,s)
+#  if __WORDSIZE == 32
+#  define CMPX(a,b)                    CMPW(a,b)
+#  define CMPXI(a,s)                   CMPWI(a,s)
+#  define CMPLX(a,b)                   CMPLW(a,b)
+#  define CMPLXI(a,s)                  CMPLWI(a,s)
+#  else
+#  define CMPX(a,b)                    CMPD(a,b)
+#  define CMPXI(a,s)                   CMPDI(a,s)
+#  define CMPLX(a,b)                   CMPLD(a,b)
+#  define CMPLXI(a,s)                  CMPLDI(a,s)
+#  endif
 #  define CNTLZW(a,s)                  FX(31,s,a,0,26)
 #  define CNTLZW_(a,s)                 FX_(31,s,a,0,26)
+#  define CNTLZD(a,s)                  FX(31,s,a,0,58)
+#  define CNTLZD_(a,s)                 FX_(31,s,a,0,58)
 #  define CRAND(d,a,b)                 FX(19,d,a,b,257)
 #  define CRANDC(d,a,b)                        FX(19,d,a,b,129)
 #  define CREQV(d,a,b)                 FX(19,d,a,b,289)
@@ -520,6 +533,19 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 #define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define negr(r0,r1)                  NEG(r0,r1)
 #  define comr(r0,r1)                  NOT(r0,r1)
+#  define bitswap(r0, r1)              _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  if __WORDSIZE == 32
+#    define clzr(r0, r1)               CNTLZW(r0, r1)
+#  else
+#    define clzr(r0, r1)               CNTLZD(r0, r1)
+#  endif
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define extr_c(r0,r1)                        EXTSB(r0,r1)
 #  define extr_uc(r0,r1)               ANDI_(r0,r1,0xff)
 #  define extr_s(r0,r1)                        EXTSH(r0,r1)
@@ -858,14 +884,14 @@ static jit_word_t _jmpi_p(jit_state_t*,jit_word_t) maybe_unused;
 #    define callr(r0,i0)               _callr(_jit,r0,i0)
 static void _callr(jit_state_t*,jit_int32_t,jit_int32_t);
 #    define calli(i0,i1)               _calli(_jit,i0,i1)
-static void _calli(jit_state_t*,jit_word_t,jit_int32_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t,jit_int32_t);
 #  define calli_p(i0,i1)               _calli_p(_jit,i0,i1)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_int32_t);
 #  else
 #    define callr(r0)                  _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
 #    define calli(i0)                  _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
 #    define calli_p(i0)                        _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #endif
@@ -1125,7 +1151,7 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 static void
 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPWI(r2, 0);
+    CMPXI(r2, 0);
     BEQ(8);
     MR(r0, r1);
 }
@@ -1133,7 +1159,7 @@ _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 static void
 _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPWI(r2, 0);
+    CMPXI(r2, 0);
     BNE(8);
     MR(r0, r1);
 }
@@ -1194,6 +1220,94 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
        jit_unget_reg(r1_reg);
 }
 
+/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+/*
+unsigned int v; // 32-bit word to reverse bit order
+
+// swap odd and even bits
+v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
+// swap consecutive pairs
+v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
+// swap nibbles ... 
+v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
+// swap bytes
+v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
+// swap 2-byte long pairs
+v = ( v >> 16             ) | ( v               << 16);
+ */
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1, t2, t3, t4;
+    movr(r0, r1);
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+    rshi_u(rn(t1), r0, 1);             /* t1 = v >> 1 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 1);           /* t2 <<= 1 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+    rshi_u(rn(t1), r0, 2);             /* t1 = v >> 2 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 2);           /* t2 <<= 2 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+    rshi_u(rn(t1), r0, 4);             /* t1 = v >> 4 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 4);           /* t2 <<= 4 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ?  0x00ff00ffL : 0x00ff00ff00ff00ffL);
+    rshi_u(rn(t1), r0, 8);             /* t1 = v >> 8 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 8);           /* t2 <<= 8 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  if __WORDSIZE == 32
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    lshi(rn(t2), r0, 16);              /* t2 = v << 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  else
+    movi(rn(t0), 0x0000ffff0000ffffL);
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 16);          /* t2 <<= 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    rshi_u(rn(t1), r0, 32);            /* t1 = v >> 32 */
+    lshi(rn(t2), r0, 32);              /* t2 = v << 32 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  endif
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    bitswap(r0, r1);
+    clor(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    bitswap(r0, r1);
+    clzr(r0, r0);
+}
+
 static void
 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
 {
@@ -1627,7 +1741,7 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_LT);
 }
@@ -1637,11 +1751,11 @@ _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     MFCR(r0);
@@ -1675,7 +1789,7 @@ _lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     CRNOT(CR_GT, CR_GT);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_GT);
@@ -1686,11 +1800,11 @@ _lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     CRNOT(CR_GT, CR_GT);
@@ -1727,7 +1841,7 @@ _lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_EQ);
 }
@@ -1737,13 +1851,13 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else if (can_zero_extend_short_p(i0))
        CMPLWI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     MFCR(r0);
@@ -1753,7 +1867,7 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     CRNOT(CR_LT, CR_LT);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_LT);
@@ -1764,11 +1878,11 @@ _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     CRNOT(CR_LT, CR_LT);
@@ -1805,7 +1919,7 @@ _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_GT);
 }
@@ -1815,11 +1929,11 @@ _gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     MFCR(r0);
@@ -1853,7 +1967,7 @@ _gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     CRNOT(CR_EQ, CR_EQ);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_EQ);
@@ -1864,13 +1978,13 @@ _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else if (can_zero_extend_short_p(i0))
        CMPLWI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     CRNOT(CR_EQ, CR_EQ);
@@ -1882,7 +1996,7 @@ static jit_word_t
 _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BLT(d);
@@ -1895,11 +2009,11 @@ _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -1942,7 +2056,7 @@ static jit_word_t
 _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BLE(d);
@@ -1955,11 +2069,11 @@ _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2002,7 +2116,7 @@ static jit_word_t
 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BEQ(d);
@@ -2015,13 +2129,13 @@ _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else if (can_zero_extend_short_p(i1))
        CMPLWI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2034,7 +2148,7 @@ static jit_word_t
 _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BGE(d);
@@ -2047,11 +2161,11 @@ _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2094,7 +2208,7 @@ static jit_word_t
 _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BGT(d);
@@ -2107,11 +2221,11 @@ _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2154,7 +2268,7 @@ static jit_word_t
 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BNE(d);
@@ -2167,13 +2281,13 @@ _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else if (can_zero_extend_short_p(i1))
        CMPLWI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2772,7 +2886,7 @@ _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     jit_int32_t                reg;
     if (r1 == _R0_REGNO) {
        if (r2 != _R0_REGNO)
-           LWZX(r0, r2, r1);
+           LWAX(r0, r2, r1);
        else {
            reg = jit_get_reg(jit_class_gpr);
            movr(rn(reg), r1);
@@ -2781,7 +2895,7 @@ _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        }
     }
     else
-       LWZX(r0, r1, r2);
+       LWAX(r0, r1, r2);
 }
 
 static void
@@ -3301,24 +3415,28 @@ _callr(jit_state_t *_jit, jit_int32_t r0
 }
 
 /* assume fixed address or reachable address */
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0
 #  if _CALL_SYSV
        , jit_int32_t varargs
 #  endif
        )
 {
+    jit_word_t         w;
 #  if _CALL_SYSV
     jit_word_t         d;
     d = (i0 - _jit->pc.w - !!varargs * 4) & ~3;
     if (can_sign_extend_jump_p(d)) {
-        /* Tell double arguments were passed in registers. */
-        if (varargs)
-            CREQV(6, 6, 6);
-        BL(d);
-    } else
+       /* Tell double arguments were passed in registers. */
+       if (varargs)
+           CREQV(6, 6, 6);
+       w = _jit->pc.w;
+       BL(d);
+    }
+    else
 #  endif
     {
+       w = _jit->pc.w;
        movi(_R12_REGNO, i0);
        callr(_R12_REGNO
 #  if _CALL_SYSV
@@ -3326,6 +3444,7 @@ _calli(jit_state_t *_jit, jit_word_t i0
 #  endif
              );
     }
+    return (w);
 }
 
 /* absolute jump */
@@ -3649,7 +3768,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
            if (!can_sign_extend_short_p(d)) {
                /* use absolute address */
                assert(can_sign_extend_short_p(label));
-               d |= 2;
+               d = label | 2;
            }
            u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe);
            break;
@@ -3677,9 +3796,9 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
            if (!can_sign_extend_jump_p(d)) {
                /* use absolute address */
                assert(can_sign_extend_jump_p(label));
-               d |= 2;
+               d = label | 2;
            }
-           u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe);
+           u.i[0] = (u.i[0] & ~0x3fffffc) | (d & 0x3fffffd);
            break;
        case 15:                                        /* LI */
 #if __WORDSIZE == 32
index a2edbd8..12631cd 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index 212e637..c8a4471 100644 (file)
@@ -1,22 +1,26 @@
 #if __WORDSIZE == 32
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __BIG_ENDIAN
-#if _CALL_SYSV
-#define JIT_INSTR_MAX 124
+#if !_CALL_SYSV
+#define JIT_INSTR_MAX 136
     0, /* data */
     0, /* live */
-    0, /* align */
+    20,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
-    124,       /* prolog */
+    136,       /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    36,        /* va_start */
-    52,        /* va_arg */
-    64,        /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     8, /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    36,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     16,        /* bxsubi_u */
     8, /* jmpr */
     4, /* jmpi */
-    12,        /* callr */
-    20,        /* calli */
+    28,        /* callr */
+    36,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     36,        /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    24,        /* movi_d */
+    28,        /* movi_d */
     4, /* ldr_d */
     8, /* ldi_d */
     4, /* ldxr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    8, /* bswapr_us */
-    16,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
-#endif /* _CALL_SYSV */
+    8, /* clo */
+    4, /* clz */
+    136,       /* cto */
+    132,       /* ctz */
+#endif /* !_CALL_SYSV */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 #if __WORDSIZE == 32
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __BIG_ENDIAN
-#if !_CALL_SYSV
+#if _CALL_SYSV
 #define JIT_INSTR_MAX 136
     0, /* data */
     0, /* live */
-    0, /* align */
+    28,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
-    136,       /* prolog */
+    124,       /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    4, /* va_start */
-    8, /* va_arg */
-    8, /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    36,        /* va_start */
+    52,        /* va_arg */
+    64,        /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     12,        /* remr_u */
     20,        /* remi_u */
     4, /* andr */
-    12,        /* andi */
+    4, /* andi */
     4, /* orr */
     12,        /* ori */
     4, /* xorr */
     16,        /* nei */
     4, /* movr */
     8, /* movi */
-    12,  /* movnr */
-    12,  /* movzr */
+    12,        /* movnr */
+    12,        /* movzr */
+    36,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     16,        /* bxsubi_u */
     8, /* jmpr */
     4, /* jmpi */
-    28,        /* callr */
-    40,        /* calli */
+    12,        /* callr */
+    20,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     24,        /* unordi_f */
     12,        /* truncr_f_i */
     0, /* truncr_f_l */
-    20,        /* extr_f */
+    36,        /* extr_f */
     4, /* extr_d_f */
     4, /* movr_f */
     12,        /* movi_f */
     32,        /* unordi_d */
     12,        /* truncr_d_i */
     0, /* truncr_d_l */
-    20,        /* extr_d */
+    36,        /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    24,        /* movi_d */
+    28,        /* movi_d */
     4, /* ldr_d */
     8, /* ldi_d */
     4, /* ldxr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    16,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
-#endif /* _CALL_AIX */
-#endif /* __BYTEORDER */
+    8, /* clo */
+    4, /* clz */
+    136,       /* cto */
+    132,       /* ctz */
+#endif /* _CALL_SYSV */
+#endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __BIG_ENDIAN
-#define JIT_INSTR_MAX 148
+#define JIT_INSTR_MAX 236
     0, /* data */
     0, /* live */
-    4, /* align */
+    28,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     36,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    36,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    44,        /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     4, /* htonr_ul */
     28,        /* callr */
     52,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    8, /* bswapr_us */
-    16,        /* bswapr_ui */
-    44,        /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    236,       /* cto */
+    232,       /* ctz */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 #if __WORDSIZE == 64
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-#define JIT_INSTR_MAX 124
+#define JIT_INSTR_MAX 236
     0, /* data */
     0, /* live */
-    4, /* align */
+    20,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     36,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    36,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    44,        /* bswapr_ul */
     8, /* htonr_us */
     16,        /* htonr_ui */
     44,        /* htonr_ul */
     12,        /* callr */
     32,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    8, /* bswapr_us */
-    16,        /* bswapr_ui */
-    44,        /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    236,       /* cto */
+    232,       /* ctz */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
index 5d2b74b..869e876 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -291,20 +291,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -364,7 +362,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -404,12 +402,16 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     jit_bool_t          incr = 1;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi++;
 #if _CALL_SYSV
@@ -420,7 +422,7 @@ _jit_arg(jit_state_t *_jit)
        offset = _jitc->function->self.size;
     if (incr)
        _jitc->function->self.size += sizeof(jit_word_t);
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -498,7 +500,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
@@ -510,7 +512,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
@@ -522,7 +524,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
@@ -534,7 +536,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
@@ -546,7 +548,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #if __WORDSIZE == 32
@@ -564,7 +566,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, JIT_RA0 - v->u.w);
@@ -576,7 +578,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
@@ -587,10 +589,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 #endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     else
@@ -599,11 +601,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    jit_inc_synth_wp(putargi, u, v);
-    assert(v->code == jit_code_arg);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else {
@@ -698,11 +700,11 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     jit_bool_t         incr = 1;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
@@ -719,12 +721,12 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     jit_bool_t          incr = 1;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
@@ -1153,6 +1155,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1293,6 +1296,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1368,6 +1374,10 @@ _emit_code(jit_state_t *_jit)
 #  endif
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
            case jit_code_casr:
                casr(rn(node->u.w), rn(node->v.w),
                     rn(node->w.q.l), rn(node->w.q.h));
@@ -1691,7 +1701,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (can_sign_extend_jump_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
@@ -1699,36 +1714,45 @@ _emit_code(jit_state_t *_jit)
                    jmpi(node->u.w);
                break;
            case jit_code_callr:
-               callr(rn(node->u.w)
 #if _CALL_SYSV
-                     , !!(node->flag & jit_flag_varargs)
+#  define xcallr(u, v)         callr(u, v)
+#  define xcalli_p(u, v)       calli_p(u, v)
+#  define xcalli(u, v)         calli(u, v)
+#else
+#  define xcallr(u, v)         callr(u)
+#  define xcalli_p(u, v)       calli_p(u)
+#  define xcalli(u, v)         calli(u)
 #endif
-                     );
+               xcallr(rn(node->u.w), !!(node->flag & jit_flag_varargs));
                break;
            case jit_code_calli:
+               value = !!(node->flag & jit_flag_varargs);
                if (node->flag & jit_flag_node) {
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
-                   word = calli_p(temp->u.w
+                   if (temp->flag & jit_flag_patch)
+                       xcalli(temp->u.w, value);
+                   else {
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
 #if _CALL_SYSV
-                                  , !!(node->flag & jit_flag_varargs)
+                       if (can_sign_extend_jump_p(word + value * 4))
+                           word = xcalli(_jit->pc.w, value);
+                       else
 #endif
-                                  );
-                   if (!(temp->flag & jit_flag_patch))
+                           word = xcalli_p(_jit->pc.w, value);
                        patch(word, node);
+                   }
                }
                else
-                   calli(node->u.w
-#if _CALL_SYSV
-                         , !!(node->flag & jit_flag_varargs)
-#endif
-                         );
+                   xcalli(node->u.w, value);
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1772,6 +1796,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1796,14 +1830,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
-           case jit_code_live:
-           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1813,10 +1859,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
index a6f9338..f3409fb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
 
 #if __WORDSIZE == 32
 #  define MININT                0x80000000
+#  define DEC_FMT              "%d"
+#  define HEX_FMT              "0x%x"
 #else
 #  define MININT                0x8000000000000000
+#  define DEC_FMT              "%ld"
+#  define HEX_FMT              "0x%lx"
 #endif
 
 
 #define print_hex(value)                                               \
     do {                                                               \
        if (value < 0 && value != MININT)                               \
-           fprintf(print_stream, "-0x%lx", -value);                    \
+           fprintf(print_stream, "-" HEX_FMT, (jit_uword_t)-value);    \
        else                                                            \
-           fprintf(print_stream, "0x%lx", value);                      \
+           fprintf(print_stream, HEX_FMT, (jit_uword_t)value);         \
     } while (0)
-#define print_dec(value)               fprintf(print_stream, "%ld", value)
+#define print_dec(value)               fprintf(print_stream, DEC_FMT, value)
 #define print_flt(value)               fprintf(print_stream, "%g", value)
 #define print_str(value)               fprintf(print_stream, "%s", value)
 #define print_ptr(value)               fprintf(print_stream, "%p", value)
index 89e9491..8da8021 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2015-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -54,13 +54,9 @@ _rewind_prolog(jit_state_t *_jit)
     _jitc->function->self.size = stack_framesize;
 #if __arm__
     assert(jit_cpu.abi);
-    _jitc->function->self.size += 64;
-#endif
-#if __mips__ && NEW_ABI
-    /* Only add extra stack space if there are varargs
-     * arguments in registers. */
-    assert(jit_arg_reg_p(_jitc->function->self.argi));
-    _jitc->function->self.size += 64;
+    _jitc->function->alist = NULL;
+#elif __mips__
+    _jitc->function->alist = NULL;
 #endif
     _jitc->function->self.argi =
        _jitc->function->self.argf = _jitc->function->self.argn = 0;
@@ -71,9 +67,10 @@ _rewind_prolog(jit_state_t *_jit)
     for (; node; node = next) {
        next = node->next;
        switch (node->code) {
-           case jit_code_arg:
+           case jit_code_arg_c:        case jit_code_arg_s:
+           case jit_code_arg_i:        case jit_code_arg_l:
                node->next = (jit_node_t *)0;
-               jit_make_arg(node);
+               jit_make_arg(node, node->code);
                break;
            case jit_code_arg_f:
                node->next = (jit_node_t *)0;
index 2ae11b9..4fd35a8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -93,10 +93,6 @@ typedef union {
 #  undef ui
 } instr_t;
 #  define ii(i)                                *_jit->pc.ui++ = i
-/* FIXME could jit_rewind_prolog() to only use extra 64 bytes
- * if a variadic jit function that have variadic arguments in
- * registers */
-#  define stack_framesize              (200 + 64)
 #  define ldr(r0, r1)                  ldr_l(r0, r1)
 #  define ldi(r0, im)                  ldi_l(r0, im)
 #  define ldxr(r0, r1, r2)             ldxr_l(r0, r1, r2)
@@ -579,12 +575,12 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #  define jmpr(r0)                     JALR(_ZERO_REGNO, r0, 0)
 #  define jmpi(im)                     _jmpi(_jit, im)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #  define jmpi_p(im)                   _jmpi_p(_jit, im)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    JALR(_RA_REGNO, r0, 0)
 #  define calli(im)                    _calli(_jit, im)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
 #  define calli_p(im)          _calli_p(_jit, im)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(i0)                   _prolog(_jit,i0)
@@ -2087,12 +2083,13 @@ _bmci(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
     return (w);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
     jit_int32_t                t0;
-    jit_word_t         dsp;
-    dsp = i0 - _jit->pc.w;
+    jit_word_t         dsp, w;
+    w = _jit->pc.w;
+    dsp = i0 - w;
     if (simm20_p(dsp))
        JAL(_ZERO_REGNO, dsp);
     else {
@@ -2101,6 +2098,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0)
        jmpr(rn(t0));
        jit_unget_reg(t0);
     }
+    return (w);
 }
 
 static jit_word_t
@@ -2115,12 +2113,13 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
     jit_int32_t                t0;
-    jit_word_t         dsp;
-    dsp = i0 - _jit->pc.w;
+    jit_word_t         dsp, w;
+    w = _jit->pc.w;
+    dsp = i0 - w;
     if (simm20_p(dsp))
        JAL(_RA_REGNO, dsp);
     else {
@@ -2129,6 +2128,7 @@ _calli(jit_state_t *_jit, jit_word_t i0)
        callr(rn(t0));
        jit_unget_reg(t0);
     }
+    return (w);
 }
 
 static jit_word_t
@@ -2146,9 +2146,10 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -2159,56 +2160,41 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
-    subi(_SP_REGNO, _SP_REGNO, stack_framesize);
-    stxi(0, _SP_REGNO, _RA_REGNO);
-    stxi(8, _SP_REGNO, _FP_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
-       stxi(16, _SP_REGNO, 9);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
-       stxi(24, _SP_REGNO, 18);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
-       stxi(32, _SP_REGNO, 19);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
-       stxi(40, _SP_REGNO, 20);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
-       stxi(48, _SP_REGNO, 21);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
-       stxi(56, _SP_REGNO, 22);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
-       stxi(64, _SP_REGNO, 23);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
-       stxi(72, _SP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S9))
-       stxi(80, _SP_REGNO, 25);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S10))
-       stxi(88, _SP_REGNO, 26);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S11))
-       stxi(96, _SP_REGNO, 27);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
-       stxi_d(104, _SP_REGNO, 8);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
-       stxi_d(112, _SP_REGNO, 9);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
-       stxi_d(120, _SP_REGNO, 18);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
-       stxi_d(128, _SP_REGNO, 19);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
-       stxi_d(136, _SP_REGNO, 20);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
-       stxi_d(144, _SP_REGNO, 21);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
-       stxi_d(152, _SP_REGNO, 22);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
-       stxi_d(160, _SP_REGNO, 23);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS8))
-       stxi_d(168, _SP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS9))
-       stxi_d(176, _SP_REGNO, 25);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS10))
-       stxi_d(184, _SP_REGNO, 26);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS11))
-       stxi_d(192, _SP_REGNO, 27);
-    movr(_FP_REGNO, _SP_REGNO);
+
+    if (_jitc->function->stack)
+       _jitc->function->need_stack = 1;
+    if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               _jitc->function->need_stack = 1;
+               break;
+           }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+    if (_jitc->function->need_frame) {
+       stxi(0, _SP_REGNO, _RA_REGNO);
+       stxi(8, _SP_REGNO, _FP_REGNO);
+    }
+    /* callee save registers */
+    for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           stxi(offs, _SP_REGNO, rn(iregs[reg]));
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame)
+       movr(_FP_REGNO, _SP_REGNO);
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
@@ -2219,7 +2205,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     }
     if (_jitc->function->self.call & jit_call_varargs) {
        for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
-           stxi(stack_framesize - ((8 - reg) * 8),
+           stxi(jit_framesize() - ((8 - reg) * 8),
                 _FP_REGNO, rn(JIT_RA0 - reg));
     }
 }
@@ -2227,58 +2213,31 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, offs;
     if (_jitc->function->assume_frame)
        return;
-    movr(_SP_REGNO, _FP_REGNO);
-    ldxi(_RA_REGNO, _SP_REGNO, 0);
-    ldxi(_FP_REGNO, _SP_REGNO, 8);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
-       ldxi(9, _SP_REGNO, 16);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
-       ldxi(18, _SP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
-       ldxi(19, _SP_REGNO, 32);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
-       ldxi(20, _SP_REGNO, 40);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
-       ldxi(21, _SP_REGNO, 48);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
-       ldxi(22, _SP_REGNO, 56);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
-       ldxi(23, _SP_REGNO, 64);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
-       ldxi(24, _SP_REGNO, 72);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S9))
-       ldxi(25, _SP_REGNO, 80);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S10))
-       ldxi(26, _SP_REGNO, 88);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S11))
-       ldxi(27, _SP_REGNO, 96);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
-       ldxi_d(8, _SP_REGNO, 104);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
-       ldxi_d(9, _SP_REGNO, 112);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
-       ldxi_d(18, _SP_REGNO, 120);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
-       ldxi_d(19, _SP_REGNO, 128);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
-       ldxi_d(20, _SP_REGNO, 136);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
-       ldxi_d(21, _SP_REGNO, 144);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
-       ldxi_d(22, _SP_REGNO, 152);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
-       ldxi_d(23, _SP_REGNO, 160);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS8))
-       ldxi_d(24, _SP_REGNO, 168);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS9))
-       ldxi_d(25, _SP_REGNO, 176);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS10))
-       ldxi_d(26, _SP_REGNO, 184);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS11))
-       ldxi_d(27, _SP_REGNO, 192);
-    addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    if (_jitc->function->need_frame) {
+       movr(_SP_REGNO, _FP_REGNO);
+       ldxi(_RA_REGNO, _SP_REGNO, 0);
+       ldxi(_FP_REGNO, _SP_REGNO, 8);
+    }
+
+    /* callee save registers */
+    for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       addi(_SP_REGNO, _SP_REGNO, jit_framesize());
     RET();
 }
 
@@ -2288,9 +2247,9 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     assert(_jitc->function->self.call & jit_call_varargs);
     /* Initialize va_list to the first stack argument. */
     if (jit_arg_reg_p(_jitc->function->vagp))
-       addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8));
+       addi(r0, _FP_REGNO, jit_framesize() - ((8 - _jitc->function->vagp) * 8));
     else
-       addi(r0, _FP_REGNO, _jitc->function->self.size);
+       addi(r0, _FP_REGNO, jit_selfsize());
 }
 
 static void
@@ -2333,7 +2292,6 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
        }
        else
            abort();
-       i.w = u.i[1];
        assert(i.I.opcode == 3 && i.I.funct3 == 3);             /* LD */
     }
 #  else
index e7884cb..89346e0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index 8c4cf04..335d3cf 100644 (file)
@@ -1,10 +1,11 @@
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 116
+#define JIT_INSTR_MAX 168
     0, /* data */
     0, /* live */
     4, /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     12,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    28,        /* casr */
+    40,        /* casi */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     4, /* extr_i */
     8, /* extr_ui */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    116,       /* bswapr_ul */
     20,        /* htonr_us */
     52,        /* htonr_ui */
     116,       /* htonr_ul */
     4, /* callr */
     16,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     4, /* movr_w_f */
     0, /* movr_ww_d */
     4, /* movr_w_d */
-    0, /* movr_f_w */
+    4, /* movr_f_w */
     4, /* movi_f_w */
     0, /* movr_d_ww */
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    116,       /* bswapr_ul */
-    28,        /* casr */
-    40,        /* casi */
+    168,       /* clo */
+    148,       /* clz */
+    168,       /* cto */
+    148,       /* ctz */
 #endif /* __WORDSIZE */
index 8828d4a..63a5cd9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2019-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *     Paulo Cesar Pereira de Andrade
  */
 
+/* callee save                                   + variadic arguments
+ * align16(ra+fp+s[1-9]+s10+s11+fs[0-9]+fs10+fs11)+align16(a[0-7]) */
+#define stack_framesize                        (208 + 64)
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
 
@@ -28,6 +32,8 @@ typedef jit_pointer_t jit_va_list_t;
 /*
  * Prototypes
  */
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #if __WORDSIZE == 64
 #  define load_const(r0, i0)           _load_const(_jit, r0, i0)
 static void _load_const(jit_state_t*, jit_int32_t, jit_word_t);
@@ -43,6 +49,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define PROTO                          1
 #  include "jit_riscv-cpu.c"
 #  include "jit_riscv-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
@@ -119,6 +126,14 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8, _S9, _S10, _S11
+};
+
+static jit_int32_t fregs[] = {
+    _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7, _FS8, _FS9, _FS10, _FS11
+};
+
 /*
  * Implementation
  */
@@ -180,6 +195,7 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -228,20 +244,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -301,16 +315,17 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
-    return (jit_arg_f_reg_p(u->u.w));
+    return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
 }
 
 void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare) {
        jit_link_prepare();
        assert(!(_jitc->function->call.call & jit_call_varargs));
@@ -334,19 +349,23 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
     assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -368,6 +387,7 @@ _jit_arg_f(jit_state_t *_jit)
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
@@ -391,6 +411,7 @@ _jit_arg_d(jit_state_t *_jit)
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
@@ -401,111 +422,129 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_c(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_c(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_uc(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_s(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_s(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_us(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_us(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_i(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_ui(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_l(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
-    else
-       jit_stxi(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, JIT_FP, regno);
+       node = jit_stxi(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -520,8 +559,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(u, JIT_FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
-    else
-       jit_ldxi_f(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_f(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -534,8 +575,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(JIT_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
-    else
-       jit_stxi_f(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_f(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -547,18 +590,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
     jit_inc_synth_fp(putargi_f, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(JIT_FA0 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       union {
-           jit_float32_t       f;
-           jit_int32_t         i;
-       } uu;
-       uu.f = u;
-       jit_movi(JIT_RA0 - (v->u.w - 8), uu.i);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, JIT_FP, regno);
+       node = jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -573,8 +612,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(u, JIT_FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
-    else
-       jit_ldxi_d(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_d(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -587,8 +628,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(JIT_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
-    else
-       jit_stxi_d(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_d(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -600,28 +643,24 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
     jit_inc_synth_dp(putargi_d, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi_d(JIT_FA0 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       union {
-           jit_float64_t       d;
-           jit_int64_t         w;
-       } uu;
-       uu.d = u;
-       jit_movi(JIT_RA0 - (v->u.w - 8), uu.w);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, JIT_FP, regno);
+       node = jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
@@ -630,16 +669,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
@@ -651,6 +691,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -673,6 +714,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -699,6 +741,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -721,6 +764,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -747,6 +791,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -775,6 +820,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -792,6 +838,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -892,6 +939,7 @@ _emit_code(jit_state_t *_jit)
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1066,6 +1114,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1113,6 +1164,14 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+#define clor(r0, r1)   fallback_clo(r0, r1)
+#define clzr(r0, r1)   fallback_clz(r0, r1)
+#define ctor(r0, r1)   fallback_cto(r0, r1)
+#define ctzr(r0, r1)   fallback_ctz(r0, r1)
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1434,6 +1493,7 @@ _emit_code(jit_state_t *_jit)
                case_brr(bunord, _d);
                case_brd(bunord);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -1444,14 +1504,22 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (simm20_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
                        word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
+               }
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
@@ -1462,22 +1530,32 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
-                       word = calli_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (simm20_p(word))
+                           word = calli(_jit->pc.w);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    calli(node->u.w);
+               }
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
+               compute_framesize();
+               patch_alist(0);
                _jitc->again = 0;
                prolog(node);
                break;
@@ -1493,10 +1571,25 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* this will be recomputed but undo anyway to have it
+                    * better self documented.*/
+                   undo.func.need_stack = _jitc->function->need_stack;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                /* remember label is defined */
@@ -1537,11 +1630,19 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+           case jit_code_arg_l:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1549,10 +1650,22 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1659,6 +1772,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_riscv-cpu.c"
 #  include "jit_riscv-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 static void
@@ -1806,6 +1920,30 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     stxi_d(i0, rn(r0), rn(r1));
 }
 
+#if __WORDSIZE != 64
+# error "only 64 bit ports tested"
+#endif
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = 16;     /* ra+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += sizeof(jit_word_t);
+
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+
+    /* Space to store variadic arguments */
+    if (_jitc->function->self.call & jit_call_varargs)
+       _jitc->framesize += (8 - _jitc->function->vagp) * 8;
+
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
index 55b7e1f..2e9e074 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -394,6 +394,8 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define EAR(R1,R2)                   RRE_(0xB24F,R1,R2)
 /* EXTRACT PSW */
 #  define EPSW(R1,R2)                  RRE_(0xB98D,R1,R2)
+/* FIND LEFTMOST ONE */
+#  define FLOGR(R1,R2)                 RRE_(0xB983,R1,R2)
 /* INSERT CHARACTER */
 #  define IC(R1,D2,X2,B2)              RX_(0x43,R1,X2,B2,D2)
 #  define ICY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x73)
@@ -966,9 +968,14 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
-#  define bswapr_us(r0, r1)            generic_bswapr_us(_jit, r0, r1)
-#  define bswapr_ui(r0, r1)            generic_bswapr_ui(_jit, r0, r1)
-#  define bswapr_ul(r0, r1)            generic_bswapr_ul(_jit, r0, r1)
+#  define bswapr_us(r0, r1)            _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0, r1)            _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#define bswapr_ul(r0, r1)              _bswapr_ul(_jit, r0, r1)
+static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#endif
 #  define movnr(r0,r1,r2)              _movnr(_jit,r0,r1,r2)
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
@@ -1051,32 +1058,39 @@ static void _qdivi_u(jit_state_t*,jit_int32_t,
 #  if __WORDSIZE == 32
 #    define lshr(r0,r1,r2)             _lshr(_jit,r0,r1,r2)
 static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-#  else
-#    define lshr(r0,r1,r2)             SLLG(r0,r1,0,r2)
-#  endif
-#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
+#    define lshi(r0,r1,i0)             _lshi(_jit,r0,r1,i0)
 static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  if __WORDSIZE == 32
 #    define rshr(r0,r1,r2)             _rshr(_jit,r0,r1,r2)
 static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-#  else
-#    define rshr(r0,r1,r2)             SRAG(r0,r1,0,r2)
-#  endif
-#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
+#    define rshi(r0,r1,i0)             _rshi(_jit,r0,r1,i0);
 static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  if __WORDSIZE == 32
 #    define rshr_u(r0,r1,r2)           _rshr_u(_jit,r0,r1,r2)
 static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  else
+#    define lshr(r0,r1,r2)             SLLG(r0,r1,0,r2)
+#    define lshi(r0,r1,i0)             SLLG(r0,r1,i0,0)
+#    define rshr(r0,r1,r2)             SRAG(r0,r1,0,r2)
+#    define rshi(r0,r1,i0)             SRAG(r0,r1,i0,0)
 #    define rshr_u(r0,r1,r2)           SRLG(r0,r1,0,r2)
+#    define rshi_u(r0,r1,i0)           SRLG(r0,r1,i0,0)
 #  endif
-#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
-static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  if __WORDSIZE == 32
 #    define negr(r0,r1)                        LCR(r0,r1)
 #  else
 #    define negr(r0,r1)                        LCGR(r0,r1)
 #  endif
+#  define bitswap(r0, r1)              _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define comr(r0,r1)                  _comr(_jit,r0,r1)
 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define andr(r0,r1,r2)               _andr(_jit,r0,r1,r2)
@@ -1289,13 +1303,13 @@ static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define bmci(i0,r0,i1)               bmxi(CC_E,i0,r0,i1)
 #  define bmci_p(i0,r0,i1)             bmxi_p(CC_E,i0,r0,i1)
 #  define jmpr(r0)                     BR(r0)
-#  define jmpi(i0)                     _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi(i0,i1)                  _jmpi(_jit,i0,i1)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t, jit_bool_t);
 #  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    BALR(_R14_REGNO,r0)
-#  define calli(i0)                    _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+#  define calli(i0,i1)                 _calli(_jit,i0,i1)
+static jit_word_t _calli(jit_state_t*,jit_word_t, jit_bool_t);
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(i0)                   _prolog(_jit,i0)
@@ -2473,6 +2487,31 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     patch_at(w, _jit->pc.w);
 }
 
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LRVR(r0, r1);
+    SRL(r0, 16, 0);
+    LLGHR(r0, r0);
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LRVR(r0, r1);
+#  if __WORDSIZE == 64
+    LLGFR(r0, r0);
+#  endif
+}
+
+#if __WORDSIZE == 64
+static void
+_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LRVGR(r0, r1);
+}
+#endif
+
 static void
 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
@@ -2897,19 +2936,14 @@ _lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        SLL(r0, 0, r2);
     }
 }
-#endif
 
 static void
 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg_but_zero(0);
-    movi(rn(reg), i0);
-    lshr(r0, r1, rn(reg));
-    jit_unget_reg_but_zero(reg);
+    movr(r0, r1);
+    SLL(r0, i0, 0);
 }
 
-#  if __WORDSIZE == 32
 static void
 _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2926,19 +2960,14 @@ _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        SRA(r0, 0, r2);
     }
 }
-#endif
 
 static void
 _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg_but_zero(0);
-    movi(rn(reg), i0);
-    rshr(r0, r1, rn(reg));
-    jit_unget_reg_but_zero(reg);
+    movr(r0, r1);
+    SRA(r0, i0, 0);
 }
 
-#  if __WORDSIZE == 32
 static void
 _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2955,16 +2984,141 @@ _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        SRL(r0, 0, r2);
     }
 }
-#endif
 
 static void
 _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg_but_zero(0);
-    movi(rn(reg), i0);
-    rshr_u(r0, r1, rn(reg));
-    jit_unget_reg_but_zero(reg);
+    movr(r0, r1);
+    SRL(r0, i0, 0);
+}
+#endif
+
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1, t2, t3, t4;
+    movr(r0, r1);
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+    rshi_u(rn(t1), r0, 1);             /* t1 = v >> 1 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 1);           /* t2 <<= 1 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+    rshi_u(rn(t1), r0, 2);             /* t1 = v >> 2 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 2);           /* t2 <<= 2 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+    rshi_u(rn(t1), r0, 4);             /* t1 = v >> 4 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 4);           /* t2 <<= 4 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ?  0x00ff00ffL : 0x00ff00ff00ff00ffL);
+    rshi_u(rn(t1), r0, 8);             /* t1 = v >> 8 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 8);           /* t2 <<= 8 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  if __WORDSIZE == 32
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    lshi(rn(t2), r0, 16);              /* t2 = v << 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  else
+    movi(rn(t0), 0x0000ffff0000ffffL);
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 16);          /* t2 <<= 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    rshi_u(rn(t1), r0, 32);            /* t1 = v >> 32 */
+    lshi(rn(t2), r0, 32);              /* t2 = v << 32 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  endif
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+    if (jit_cpu.flogr) {
+#endif
+       comr(r0, r1);
+       clzr(r0, r0);
+#if CHECK_FLOGR
+    }
+    else
+       fallback_clo(r0, r1);
+#endif
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+    if (jit_cpu.flogr) {
+#endif
+#if __WORDSIZE == 32
+       jit_word_t              w;
+#endif
+       jit_int32_t             regno;
+       regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+       SLLG(rn(regno), r1, 32, 0);
+#else
+       movr(rn(regno), r1);
+#endif
+       FLOGR(rn(regno), rn(regno));
+       movr(r0, rn(regno));
+#if __WORDSIZE == 32
+       w = blei_p(_jit->pc.w, r0, 31);
+       rshi(r0, r0, 1);        /* r0 is 64 */
+       patch_at(w, _jit->pc.w);
+#endif
+       jit_unget_reg_pair(regno);
+#if CHECK_FLOGR
+    }
+    else
+       fallback_clz(r0, r1);
+#endif
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+    if (jit_cpu.flogr) {
+#endif
+       bitswap(r0, r1);
+       clor(r0, r0);
+#if CHECK_FLOGR
+    }
+    else
+       fallback_cto(r0, r1);
+#endif
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+    if (jit_cpu.flogr) {
+#endif
+       bitswap(r0, r1);
+       clzr(r0, r0);
+#if CHECK_FLOGR
+    }
+    else
+       fallback_ctz(r0, r1);
+#endif
 }
 
 static void
@@ -3497,13 +3651,14 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 }
 #endif
 
-static void
-_jmpi(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
 {
-    jit_word_t         d;
     jit_int32_t                reg;
-    d = (i0 - _jit->pc.w) >> 1;
-    if (s16_p(d))
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 1;
+    if (i1 && s16_p(d))
        J(x16(d));
     else if (s32_p(d))
        BRL(d);
@@ -3513,6 +3668,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0)
        jmpr(rn(reg));
        jit_unget_reg_but_zero(reg);
     }
+    return (w);
 }
 
 static jit_word_t
@@ -3527,13 +3683,16 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
-static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
 {
-    jit_word_t         d;
     jit_int32_t                reg;
-    d = (i0 - _jit->pc.w) >> 1;
-    if (s32_p(d))
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 1;
+    if (i1 && s16_p(d))
+       BRAS(_R14_REGNO, x16(d));
+    else if (s32_p(d))
        BRASL(_R14_REGNO, d);
     else {
        reg = jit_get_reg_but_zero(0);
@@ -3541,6 +3700,7 @@ _calli(jit_state_t *_jit, jit_word_t i0)
        callr(rn(reg));
        jit_unget_reg_but_zero(reg);
     }
+    return (w);
 }
 
 static jit_word_t
@@ -3889,17 +4049,17 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
        u.s[7] = i1.s;
 #endif
     }
-    /* BRC */
+    /* BRC or BRL */
     else if (i0.b.op == 0xA7) {
-       assert(i0.b.r3 == 0x4);
+       assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5);
        d = (label - instr) >> 1;
        assert(s16_p(d));
        i1.b.i2 = d;
        u.s[1] = i1.s;
     }
-    /* BRCL */
+    /* BRCL or BRASL */
     else if (i0.b.op == 0xC0) {
-       assert(i0.b.r3 == 0x4);
+       assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5);
        d = (label - instr) >> 1;
        assert(s32_p(d));
        i12.i = d;
index edf9ddd..6c3c4ac 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
index e70c65f..ee30447 100644 (file)
@@ -1,11 +1,11 @@
-
 #if __WORDSIZE == 32
-#define JIT_INSTR_MAX 94
+#define JIT_INSTR_MAX 164
     0, /* data */
     0, /* live */
-    2, /* align */
+    4, /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     2, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     40,        /* va_start */
-    86,        /* va_arg */
-    82,        /* va_arg_d */
+    82,        /* va_arg */
+    78,        /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
@@ -43,8 +58,8 @@
     14,        /* rsbi */
     6, /* mulr */
     14,        /* muli */
-    46,        /* qmulr */
-    50,        /* qmuli */
+    38,        /* qmulr */
+    42,        /* qmuli */
     10,        /* qmulr_u */
     18,        /* qmuli_u */
     10,        /* divr */
     4, /* xorr */
     12,        /* xori */
     8, /* lshr */
-    10,        /* lshi */
+    6, /* lshi */
     8, /* rshr */
-    10,        /* rshi */
+    6, /* rshi */
     8, /* rshr_u */
-    10,        /* rshi_u */
+    6, /* rshi_u */
     2, /* negr */
     8, /* comr */
     16,        /* ltr */
     8, /* movi */
     14,        /* movnr */
     14,        /* movzr */
+    22,        /* casr */
+    28,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    12,        /* bswapr_us */
+    4, /* bswapr_ui */
+    0, /* bswapr_ul */
     4, /* htonr_us */
     2, /* htonr_ui */
     0, /* htonr_ul */
     8, /* bxsubr_u */
     12,        /* bxsubi_u */
     2, /* jmpr */
-    10,        /* jmpi */
+    6, /* jmpi */
     2, /* callr */
-    10,        /* calli */
+    6, /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    38,        /* bswapr_us */
-    94,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    22,        /* casr */
-    28,        /* casi */
+    36,        /* clo */
+    28,        /* clz */
+    164,       /* cto */
+    158,       /* ctz */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 300
+#define JIT_INSTR_MAX 280
     0, /* data */
     0, /* live */
-    6, /* align */
+    20,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     2, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     44,        /* va_start */
-    104,       /* va_arg */
-    100,       /* va_arg_d */
+    100,       /* va_arg */
+    96,        /* va_arg_d */
     0, /* va_end */
     8, /* addr */
     24,        /* addi */
     28,        /* rsbi */
     8, /* mulr */
     24,        /* muli */
-    60,        /* qmulr */
-    68,        /* qmuli */
+    52,        /* qmulr */
+    60,        /* qmuli */
     16,        /* qmulr_u */
     32,        /* qmuli_u */
     12,        /* divr */
     8, /* xorr */
     24,        /* xori */
     6, /* lshr */
-    10,        /* lshi */
+    6, /* lshi */
     6, /* rshr */
-    10,        /* rshi */
+    6, /* rshi */
     6, /* rshr_u */
-    10,        /* rshi_u */
+    6, /* rshi_u */
     4, /* negr */
     12,        /* comr */
     20,        /* ltr */
     16,        /* movi */
     18,        /* movnr */
     18,        /* movzr */
+    30,        /* casr */
+    42,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    12,        /* bswapr_us */
+    8, /* bswapr_ui */
+    4, /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     4, /* htonr_ul */
     6, /* ldr_c */
     18,        /* ldi_c */
     6, /* ldr_uc */
-    18,        /* ldi_uc */
+    22,        /* ldi_uc */
     6, /* ldr_s */
     18,        /* ldi_s */
     6, /* ldr_us */
     14,        /* ldxr_l */
     26,        /* ldxi_l */
     4, /* str_c */
-    16,        /* sti_c */
+    20,        /* sti_c */
     4, /* str_s */
     16,        /* sti_s */
     4, /* str_i */
     10,        /* bxsubr_u */
     14,        /* bxsubi_u */
     2, /* jmpr */
-    18,        /* jmpi */
+    6, /* jmpi */
     2, /* callr */
-    18,        /* calli */
+    14,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    60,        /* bswapr_us */
-    140,       /* bswapr_ui */
-    300,       /* bswapr_ul */
-    30,        /* casr */
-    42,        /* casi */
+    24,        /* clo */
+    12,        /* clz */
+    280,       /* cto */
+    272,       /* ctz */
 #endif /* __WORDSIZE */
index 30ab760..6934b11 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  * Authors:
  *     Paulo Cesar Pereira de Andrade
  */
+#define CHECK_FLOGR    0
+
+#if CHECK_FLOGR
+#include <signal.h>
+#include <setjmp.h>
+#endif
 
 #include <lightning.h>
 #include <lightning/jit_private.h>
@@ -88,11 +94,15 @@ extern void __clear_cache(void *, void *);
 #define PROTO                          1
 #  include "jit_s390-cpu.c"
 #  include "jit_s390-fpu.c"
+#  if CHECK_FLOGR
+#    include "jit_fallback.c"
+#  endif
 #undef PROTO
 
 /*
  * Initialization
  */
+jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0x0,                   "%r0" },
     { rc(gpr) | 0x1,                   "%r1" },
@@ -129,13 +139,48 @@ jit_register_t            _rvs[] = {
     { rc(fpr) | rc(arg) | 0x0,         "%f0" },
     { _NOREG,                          "<none>" },
 };
+#if CHECK_FLOGR
+static sigjmp_buf      jit_env;
+#endif
 
 /*
  * Implementation
  */
+#if CHECK_FLOGR
+static void
+sigill_handler(int signum)
+{
+    jit_cpu.flogr = 0;
+    siglongjmp(jit_env, 1);
+}
+#endif
+
 void
 jit_get_cpu(void)
 {
+#if CHECK_FLOGR
+    int                        r12, r13;
+    struct             sigaction new_action, old_action;
+    new_action.sa_handler = sigill_handler;
+    sigemptyset(&new_action.sa_mask);
+    new_action.sa_flags = 0;
+    sigaction(SIGILL, NULL, &old_action);
+    if (old_action.sa_handler != SIG_IGN) {
+       sigaction(SIGILL, &new_action, NULL);
+       if (!sigsetjmp(jit_env, 1)) {
+           jit_cpu.flogr = 1;
+           /* flogr %r12, %r12 */
+           __asm__ volatile("lgr %%r12, %0; lgr %%r13, %1;"
+                            "flogr %%r12, %%r12;"
+                            "lgr %1, %%r13; lgr %0, %%r12;"
+                            : "=r" (r12), "=r" (r13));
+           sigaction(SIGILL, &old_action, NULL);
+       }
+    }
+#else
+    /* By default, assume it is available */
+    jit_cpu.flogr = 1;
+#endif
 }
 
 void
@@ -240,18 +285,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
+    jit_code_inc_synth_w(code, u);
     jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -305,7 +350,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -352,18 +397,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -408,7 +457,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _R2 - v->u.w);
@@ -421,7 +470,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _R2 - v->u.w);
@@ -434,7 +483,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _R2 - v->u.w);
@@ -447,7 +496,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _R2 - v->u.w);
@@ -460,7 +509,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #if __WORDSIZE == 32
@@ -479,7 +528,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _R2 - v->u.w);
@@ -492,7 +541,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _R2 - v->u.w);
@@ -503,10 +552,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 #endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_R2 - v->u.w, u);
     else
@@ -515,11 +564,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_R2 - v->u.w, u);
     else {
@@ -627,10 +676,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_R2 - _jitc->function->call.argi, u);
@@ -644,11 +693,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_R2 - _jitc->function->call.argi, u);
@@ -890,6 +939,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1032,6 +1082,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 1) & ~1);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1082,6 +1135,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1427,14 +1484,21 @@ _emit_code(jit_state_t *_jit)
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
-                       jmpi(temp->u.w);
+                       jmpi(temp->u.w, 1);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s32_p(word)) {
+                           offset = s16_p(word);
+                           word = jmpi(_jit->pc.w, offset);
+                       }
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                else
-                   jmpi(node->u.w);
+                   jmpi(node->u.w, 1);
                break;
            case jit_code_callr:
                callr(rn(node->u.w));
@@ -1445,19 +1509,27 @@ _emit_code(jit_state_t *_jit)
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
-                       calli(temp->u.w);
+                       calli(temp->u.w, 1);
                    else {
-                       word = calli_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s32_p(word)) {
+                           offset =s16_p(word);
+                           word = calli(_jit->pc.w, offset);
+                       }
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                else
-                   calli(node->u.w);
+                   calli(node->u.w, 1);
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1478,6 +1550,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1504,11 +1586,23 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1518,10 +1612,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1565,6 +1675,9 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_s390-cpu.c"
 #  include "jit_s390-fpu.c"
+#  if CHECK_FLOGR
+#    include "jit_fallback.c"
+#  endif
 #undef CODE
 
 void
index b3e1cae..143a5d9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -28,7 +28,7 @@
  */
 static jit_int16_t     _szs[jit_code_last_code] = {
 #if GET_JIT_SIZE
-#  define JIT_INSTR_MAX                512
+#  define JIT_INSTR_MAX                1024
 #else
 #  if defined(__i386__) || defined(__x86_64__)
 #    include "jit_x86-sz.c"
@@ -121,7 +121,15 @@ _jit_get_size(jit_state_t *_jit)
                break;
        }
 #  endif
-       size += _szs[node->code];
+       switch (node->code) {
+           /* The instructions are special because they can be arbitrarily long.  */
+           case jit_code_align:
+           case jit_code_skip:
+               size += node->u.w;
+               break;
+           default:
+               size += _szs[node->code];
+       }
     }
 #  if __riscv && __WORDSIZE == 64
     /* Heuristically only 20% of constants are unique. */
@@ -143,7 +151,7 @@ jit_finish_size(void)
 {
 #if GET_JIT_SIZE
     FILE               *fp;
-    jit_word_t          offset;
+    int                         offset;
 
     /* Define a single path */
     fp = fopen(JIT_SIZE_PATH, "a");
index 86eb05e..f4ce621 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -120,6 +120,11 @@ static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
 static void _f3a(jit_state_t*,jit_int32_t,
                 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
     maybe_unused;
+#  define f2c1(op,rd,op3,rs1,opf,rs2)  _f2c1(_jit,op,rd,op3,rs1,opf,rs2)
+static void
+_f2c1(jit_state_t*,jit_int32_t, jit_int32_t,
+      jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
+    maybe_unused;
 #  define LDSB(rs1, rs2, rd)           f3r(3, rd, 9, rs1, rs2)
 #  define LDSBI(rs1, imm, rd)          f3i(3, rd, 9, rs1, imm)
 #  define LDSH(rs1, rs2, rd)           f3r(3, rd, 10, rs1, rs2)
@@ -545,6 +550,7 @@ static void _f3a(jit_state_t*,jit_int32_t,
 #  define UNIMP(imm)                   f2r(0, 0, 0, imm)
 #  define FLUSH(rs1, rs2)              f3r(2, 0, 59, rs1, rs2)
 #  define FLUSHI(rs1, im)              f3i(2, 0, 59, rs1, imm)
+#  define LZCNT(rs2, rd)               f2c1(2, rd, 54, 0, 23, rs2)
 #  define nop(i0)                      _nop(_jit, i0)
 static void _nop(jit_state_t*, jit_int32_t);
 #  define movr(r0, r1)                 _movr(_jit, r0, r1)
@@ -567,6 +573,16 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 #define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define comr(r0, r1)                 XNOR(r1, 0, r0)
 #  define negr(r0, r1)                 NEG(r1, r0)
+#  define bitswap(r0, r1)              _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define addr(r0, r1, r2)             ADD(r1, r2, r0)
 #  define addi(r0, r1, i0)             _addi(_jit, r0, r1, i0)
 static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
@@ -941,13 +957,13 @@ _bm_w(jit_state_t*,jit_bool_t,jit_word_t,jit_int32_t,jit_word_t);
 #  define jmpr(r0)                     _jmpr(_jit, r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #  define jmpi(i0)                     _jmpi(_jit, i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #  define jmpi_p(i0)                   _jmpi_p(_jit, i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    _callr(_jit, r0)
 static void _callr(jit_state_t*,jit_int32_t);
 #  define calli(i0)                    _calli(_jit, i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
 #  define calli_p(i0)                  _calli_p(_jit, i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(node)                 _prolog(_jit, node)
@@ -1182,6 +1198,26 @@ _f1(jit_state_t *_jit, jit_int32_t op, jit_int32_t disp30)
     ii(v.v);
 }
 
+static void
+_f2c1(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+      jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2)
+{
+    jit_instr_t                v;
+    assert(!(op  & 0xfffffffc));
+    assert(!(rd  & 0xffffffe0));
+    assert(!(res & 0xffffffc0));
+    assert(!(rs1 & 0xffffffe0));
+    assert(!(opf & 0xfffffe00));
+    assert(!(rs2 & 0xfffffe00));
+    v.op.b = op;
+    v.rd.b = rd;
+    v.op3.b = op3;
+    v.rs1.b = rs1;
+    v.opf.b = opf;
+    v.rs2.b = rs2;
+    ii(v.v);
+}
+
 static void
 _nop(jit_state_t *_jit, jit_int32_t i0)
 {
@@ -1296,6 +1332,111 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
         jit_unget_reg(r1_reg);
 }
 
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1, t2, t3, t4;
+    movr(r0, r1);
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+    rshi_u(rn(t1), r0, 1);             /* t1 = v >> 1 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 1);           /* t2 <<= 1 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+    rshi_u(rn(t1), r0, 2);             /* t1 = v >> 2 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 2);           /* t2 <<= 2 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+    rshi_u(rn(t1), r0, 4);             /* t1 = v >> 4 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 4);           /* t2 <<= 4 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ?  0x00ff00ffL : 0x00ff00ff00ff00ffL);
+    rshi_u(rn(t1), r0, 8);             /* t1 = v >> 8 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 8);           /* t2 <<= 8 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  if __WORDSIZE == 32
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    lshi(rn(t2), r0, 16);              /* t2 = v << 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  else
+    movi(rn(t0), 0x0000ffff0000ffffL);
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 16);          /* t2 <<= 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    rshi_u(rn(t1), r0, 32);            /* t1 = v >> 32 */
+    lshi(rn(t2), r0, 32);              /* t2 = v << 32 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  endif
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.lzcnt) {
+       comr(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_clo(r0, r1);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.lzcnt) {
+#if __WORDSIZE == 32
+       jit_word_t              w;
+       SLLXI(r1, 32, r0);
+       LZCNT(r0, r0);
+#if __WORDSIZE == 32
+       w = blei(_jit->pc.w, r0, 31);
+       rshi(r0, r0, 1);        /* r0 is 64 */
+       patch_at(w, _jit->pc.w);
+#endif
+#else
+       LZCNT(r1, r0);
+    }
+    else
+       fallback_clz(r0, r1);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.lzcnt) {
+       bitswap(r0, r1);
+       clor(r0, r0);
+    }
+    else
+       fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.lzcnt) {
+       bitswap(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_ctz(r0, r1);
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -2310,7 +2451,7 @@ _bw(jit_state_t *_jit, jit_int32_t cc,
 #  if __WORDSIZE == 32
        B(cc, (i0 - w) >> 2);
 #  else
-       B(cc, (i0 - w) >> 2);
+       BP(cc, (i0 - w) >> 2);
 #  endif
        NOP();
     }
@@ -2430,14 +2571,15 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
     NOP();
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
     jit_int32_t                reg;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (s22_p(w)) {
-       BA(w);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    if (s22_p(d)) {
+       BA(d);
        NOP();
     }
     else {
@@ -2446,6 +2588,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0)
        jmpr(rn(reg));
        jit_unget_reg(reg);
     }
+    return (w);
 }
 
 static jit_word_t
@@ -2467,17 +2610,19 @@ _callr(jit_state_t *_jit, jit_int32_t r0)
     NOP();
 }
 
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (s30_p(w)) {
-       CALLI(w);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    if (s30_p(d)) {
+       CALLI(d);
        NOP();
     }
     else
-       (void)calli_p(i0);
+       w = calli_p(i0);
+    return (w);
 }
 
 static jit_word_t
@@ -2551,24 +2696,24 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
 {
     if (_jitc->function->assume_frame)
        return;
-    /* (most) other backends do not save incoming arguments, so,
-     * only save locals here */
+    if (_jitc->function->allocar)
+       subi(_SP_REGNO, _FP_REGNO, _jitc->function->stack);
     if (jit_regset_tstbit(&_jitc->function->regset, _L0))
-       ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0));
+       ldxi(_L0_REGNO, _SP_REGNO, _jitc->function->stack + OFF(0));
     if (jit_regset_tstbit(&_jitc->function->regset, _L1))
-       ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1));
+       ldxi(_L1_REGNO, _SP_REGNO, _jitc->function->stack + OFF(1));
     if (jit_regset_tstbit(&_jitc->function->regset, _L2))
-       ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2));
+       ldxi(_L2_REGNO, _SP_REGNO, _jitc->function->stack + OFF(2));
     if (jit_regset_tstbit(&_jitc->function->regset, _L3))
-       ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3));
+       ldxi(_L3_REGNO, _SP_REGNO, _jitc->function->stack + OFF(3));
     if (jit_regset_tstbit(&_jitc->function->regset, _L4))
-       ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4));
+       ldxi(_L4_REGNO, _SP_REGNO, _jitc->function->stack + OFF(4));
     if (jit_regset_tstbit(&_jitc->function->regset, _L5))
-       ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5));
+       ldxi(_L5_REGNO, _SP_REGNO, _jitc->function->stack + OFF(5));
     if (jit_regset_tstbit(&_jitc->function->regset, _L6))
-       ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6));
+       ldxi(_L6_REGNO, _SP_REGNO, _jitc->function->stack + OFF(6));
     if (jit_regset_tstbit(&_jitc->function->regset, _L7))
-       ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7));
+       ldxi(_L7_REGNO, _SP_REGNO, _jitc->function->stack + OFF(7));
     RESTOREI(0, 0, 0);
     RETL();
     NOP();
@@ -2649,6 +2794,11 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
        else
            abort();
     }
+    else if (i.op.b == 1) {
+       assert(s30_p((label - instr) >> 2));
+       i.disp30.b = (label - instr) >> 2;
+       u.i[0] = i.v;
+    }
     else
        abort();
 }
index 9531347..d0e7e81 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -498,6 +498,44 @@ _f3f(jit_state_t *_jit, jit_int32_t rd,
 }
 
 #  if __WORDSIZE == 64
+/* Handle the special case of using all float registers, as exercised
+ * in check/carg.c.
+ * For example:
+ *     putargr_f JIT_F0 $ARG
+ * where JIT_F0 is %f32 and $ARG is %f31 and if %f30 (the mapping for %f31)
+ * is live, the jit_get_reg() call might return %f30, but, because it is
+ * live, will spill/reload it, generating assembly:
+ *
+ *     std  %f30, [ %fp + OFFS ]
+ *     fmovd  %f32, %f30
+ *     fmovs  %f30, %f31
+ *     ldd  [ %fp + OFFS ], %f30
+ *
+ * what basically becomes a noop as it restores the old value.
+ */
+#define get_sng_reg(u)         _get_sng_reg(_jit, u)
+static jit_int32_t
+_get_sng_reg(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                reg, tmp;
+    /* Attempt to get a nospill register */
+    reg = jit_get_reg(CLASS_SNG | jit_class_nospill | jit_class_chk);
+    if (reg == JIT_NOREG) {
+       /* Will need to spill, so allow spilling it. */
+       reg = jit_get_reg(CLASS_SNG);
+       /* If the special condition happens, allocate another one.
+        * This will generate uglier machine code (code for floats
+        * is already ugly), but will work, but doing a double
+        * spill/reload; the first one being a noop.  */
+       if (rn(reg) == r0 - 1) {
+           tmp = reg;
+           reg = jit_get_reg(CLASS_SNG);
+           jit_unget_reg(tmp);
+       }
+    }
+    return (reg);
+}
+
 static void
 _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -507,7 +545,7 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
            if (single_precision_p(r1))
                FMOVS(r1, r0);
            else {
-               t1 = jit_get_reg(CLASS_SNG);
+               t1 = get_sng_reg(r0);
                movr_d(rn(t1), r1);
                FMOVS(rn(t1), r0);
                jit_unget_reg(t1);
@@ -515,13 +553,13 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
        }
        else {
            if (single_precision_p(r1)) {
-               t0 = jit_get_reg(CLASS_SNG);
+               t0 = get_sng_reg(r0);
                FMOVS(r1, rn(t0));
                movr_d(r0, rn(t0));
                jit_unget_reg(t0);
            }
            else {
-               t1 = jit_get_reg(CLASS_SNG);
+               t1 = get_sng_reg(r0);
                movr_d(rn(t1), r1);
                FMOVS(rn(t1), rn(t1));
                movr_d(r0, rn(t1));
@@ -1491,7 +1529,12 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     assert(_jitc->function->self.call & jit_call_varargs);
 
     /* Load argument. */
+#if __WORDSIZE == 64
     ldr_d(r0, r1);
+#else
+    ldr_f(r0, r1);
+    ldxi_f(r0 + 1, r1, 4);
+#endif
 
     /* Update vararg stack pointer. */
     addi(r1, r1, 8);
index 265769d..95954d9 100644 (file)
@@ -1,10 +1,11 @@
 #if __WORDSIZE == 32
-#define JIT_INSTR_MAX 52
+#define JIT_INSTR_MAX 180
     0, /* data */
     0, /* live */
     0, /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
-    8, /* va_arg_d */
+    12,        /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     8, /* movi */
     16,        /* movnr */
     16,        /* movzr */
+    24,        /* casr */
+    32,        /* casi */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    0, /* bswapr_ul */
     8, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     12,        /* bxsubr_u */
     12,        /* bxsubi_u */
     8, /* jmpr */
-    16,        /* jmpi */
+    8, /* jmpi */
     8, /* callr */
-    16,        /* calli */
+    8, /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    24,        /* casr */
-    32,        /* casi */
+    176,       /* clo */
+    148,       /* clz */
+    180,       /* cto */
+    152,       /* ctz */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 116
+#define JIT_INSTR_MAX 216
     0, /* data */
     0, /* live */
-    4, /* align */
+    24,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     4, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     24,        /* movi */
     16,        /* movnr */
     16,        /* movzr */
+    24,        /* casr */
+    44,        /* casi */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     8, /* extr_i */
     8, /* extr_ui */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    116,       /* bswapr_ul */
     8, /* htonr_us */
     8, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
     24,        /* ldi_c */
     4, /* ldr_uc */
-    24,        /* ldi_uc */
+    28,        /* ldi_uc */
     4, /* ldr_s */
-    24,        /* ldi_s */
+    28,        /* ldi_s */
     4, /* ldr_us */
-    24,        /* ldi_us */
+    28,        /* ldi_us */
     4, /* ldr_i */
-    24,        /* ldi_i */
+    28,        /* ldi_i */
     4, /* ldr_ui */
-    24,        /* ldi_ui */
+    28,        /* ldi_ui */
     4, /* ldr_l */
-    24,        /* ldi_l */
+    28,        /* ldi_l */
     4, /* ldxr_c */
     24,        /* ldxi_c */
     4, /* ldxr_uc */
     4, /* ldxr_l */
     24,        /* ldxi_l */
     4, /* str_c */
-    24,        /* sti_c */
+    28,        /* sti_c */
     4, /* str_s */
-    24,        /* sti_s */
+    28,        /* sti_s */
     4, /* str_i */
-    24,        /* sti_i */
+    28,        /* sti_i */
     4, /* str_l */
-    24,        /* sti_l */
+    28,        /* sti_l */
     4, /* stxr_c */
     24,        /* stxi_c */
     4, /* stxr_s */
     12,        /* bxsubr_u */
     12,        /* bxsubi_u */
     8, /* jmpr */
-    32,        /* jmpi */
+    8, /* jmpi */
     8, /* callr */
-    32,        /* calli */
+    40,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     16,        /* truncr_f_l */
     20,        /* extr_f */
     12,        /* extr_d_f */
-    16,        /* movr_f */
+    24,        /* movr_f */
     32,        /* movi_f */
     8, /* ldr_f */
-    28,        /* ldi_f */
+    32,        /* ldi_f */
     8, /* ldxr_f */
     28,        /* ldxi_f */
     8, /* str_f */
-    28,        /* sti_f */
+    32,        /* sti_f */
     8, /* stxr_f */
     28,        /* stxi_f */
     20,        /* bltr_f */
     20,        /* bler_f */
     44,        /* blei_f */
     28,        /* beqr_f */
-    60,        /* beqi_f */
+    52,        /* beqi_f */
     20,        /* bger_f */
     44,        /* bgei_f */
     20,        /* bgtr_f */
     44,        /* bgti_f */
     20,        /* bner_f */
-    44,        /* bnei_f */
+    60,        /* bnei_f */
     20,        /* bunltr_f */
     44,        /* bunlti_f */
     20,        /* bunler_f */
     4, /* movr_d */
     32,        /* movi_d */
     4, /* ldr_d */
-    24,        /* ldi_d */
+    28,        /* ldi_d */
     4, /* ldxr_d */
     24,        /* ldxi_d */
     4, /* str_d */
-    24,        /* sti_d */
+    28,        /* sti_d */
     4, /* stxr_d */
     24,        /* stxi_d */
     12,        /* bltr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    116,       /* bswapr_ul */
-    24,        /* casr */
-    44,        /* casi */
+    216,       /* clo */
+    188,       /* clz */
+    204,       /* cto */
+    176,       /* ctz */
 #endif /* __WORDSIZE */
index cd45d23..9e837d8 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *     Paulo Cesar Pereira de Andrade
  */
 
+/* Handling SIGILL should not be done by Lightning, but can either use
+ * sample, or use another approach to set jit_cpu.lzcnt
+ */
+#define CHECK_LZCNT    0
+
+#if CHECK_LZCNT
+#include <signal.h>
+#include <setjmp.h>
+#endif
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 6)
 #if __WORDSIZE == 32
 #  define jit_arg_d_reg_p(i)           ((i) >= 0 && (i) < 5)
@@ -40,11 +50,13 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define PROTO                          1
 #  include "jit_sparc-cpu.c"
 #  include "jit_sparc-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
  * Initialization
  */
+jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
     { 0x00,                            "%g0" },
     { 0x01,                            "%g1" },
@@ -147,13 +159,45 @@ jit_register_t            _rvs[] = {
 #  endif
     { _NOREG,                          "<none>" },
 };
+#if CHECK_LZCNT
+sigjmp_buf             jit_env;
+#endif
 
 /*
  * Implementation
  */
+#if CHECK_LZCNT
+static void
+sigill_handler(int signum)
+{
+    jit_cpu.lzcnt = 0;
+    siglongjmp(jit_env, 1);
+}
+#endif
+
 void
 jit_get_cpu(void)
 {
+#if CHECK_LZCNT
+    int                        g2;
+    struct             sigaction new_action, old_action;
+    new_action.sa_handler = sigill_handler;
+    sigemptyset(&new_action.sa_mask);
+    new_action.sa_flags = 0;
+    sigaction(SIGILL, NULL, &old_action);
+    if (old_action.sa_handler != SIG_IGN) {
+       sigaction(SIGILL, &new_action, NULL);
+       if (!sigsetjmp(jit_env, 1)) {
+           jit_cpu.lzcnt = 1;
+           /* lzcnt %g2, %g2 */
+           __asm__ volatile("mov %%g2, %0; .long 0xa3b0021; mov %0, %%g2"
+                            : "=r" (g2));
+           sigaction(SIGILL, &old_action, NULL);
+       }
+    }
+#else
+    jit_cpu.lzcnt = 0;
+#endif
 }
 
 void
@@ -184,7 +228,7 @@ _jit_prolog(jit_state_t *_jit)
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
     _jitc->function->self.size = stack_framesize;
     _jitc->function->self.argi = _jitc->function->self.argf =
-       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+       _jitc->function->self.alen = 0;
     /* float conversion */
 #  if __WORDSIZE == 32
     _jitc->function->self.aoff = -8;
@@ -265,20 +309,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -339,12 +381,13 @@ jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 #  if __WORDSIZE == 32
-    if (u->code == jit_code_arg || u->code == jit_code_arg_f)
+    if ((u->code >= jit_code_arg_c && u->code <= jit_code_arg) ||
+       u->code == jit_code_arg_f)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_d);
     return (jit_arg_d_reg_p(u->u.w));
 #  else
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f);
     return (jit_arg_d_reg_p(u->u.w));
@@ -379,11 +422,15 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
@@ -394,7 +441,7 @@ _jit_arg(jit_state_t *_jit)
        offset = BIAS(_jitc->function->self.size);
        _jitc->function->self.size += sizeof(jit_word_t);
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -471,7 +518,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _I0 + v->u.w);
@@ -484,7 +531,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _I0 + v->u.w);
@@ -497,7 +544,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _I0 + v->u.w);
@@ -510,7 +557,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _I0 + v->u.w);
@@ -523,7 +570,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #  if __WORDSIZE == 64
@@ -542,7 +589,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _I0 + v->u.w);
@@ -555,7 +602,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _I0 + v->u.w);
@@ -566,10 +613,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 #  endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_I0 + v->u.w, u);
     else
@@ -578,11 +625,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_I0 + v->u.w, u);
     else {
@@ -795,9 +842,9 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_O0 + _jitc->function->call.argi, u);
@@ -816,10 +863,10 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                regno;
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_O0 + _jitc->function->call.argi, u);
@@ -1193,6 +1240,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1326,6 +1374,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1506,6 +1557,10 @@ _emit_code(jit_state_t *_jit)
                break;
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_brr(blt,);
                case_brw(blt,);
                case_brr(blt, _u);
@@ -1723,7 +1778,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s22_p(word >> 2))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
@@ -1738,9 +1798,17 @@ _emit_code(jit_state_t *_jit)
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
-                   word = calli_p(temp->u.w);
-                   if (!(temp->flag & jit_flag_patch))
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s30_p(word >> 2))
+                           word = calli(_jit->pc.w);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
+                   }
                }
                else
                    calli(node->u.w);
@@ -1749,6 +1817,7 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1769,6 +1838,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1793,11 +1872,23 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#if __WORDSIZE == 64
+          case jit_code_arg_l:
+#endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1807,10 +1898,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1882,6 +1989,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_sparc-cpu.c"
 #  include "jit_sparc-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
index 1a473de..f0e4155 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
 #  else
 #    define il(l)                      ii(l)
 #  endif
-#  define patch_abs(instr, label)                                      \
-       *(jit_word_t *)(instr - sizeof(jit_word_t)) = label
-#  define patch_rel(instr, label)                                      \
-       *(jit_int32_t *)(instr - 4) = label - instr
-#  define patch_rel_char(instr, label)                                 \
-       *(jit_int8_t *)(instr - 1) = label - instr
 #  define rex(l, w, r, x, b)           _rex(_jit, l, w, r, x, b)
 static void
 _rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
@@ -186,7 +180,8 @@ static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #define addci(r0, r1, i0)              _addci(_jit, r0, r1, i0)
 static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  define iaddxr(r0, r1)               alur(X86_ADC, r0, r1)
+#  define iaddxr(r0, r1)               _iaddxr(_jit, r0, r1)
+static void _iaddxr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define addxr(r0, r1, r2)            _addxr(_jit, r0, r1, r2)
 static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define iaddxi(r0, i0)               alui(X86_ADC, r0, i0)
@@ -308,6 +303,14 @@ static void _incr(jit_state_t*, jit_int32_t, jit_int32_t);
 #    define decr(r0, r1)               _decr(_jit, r0, r1)
 static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  endif
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define cr(code, r0, r1, r2)         _cr(_jit, code, r0, r1, r2)
 static void
 _cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
@@ -358,7 +361,13 @@ static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define imovi(r0, i0)                        _imovi(_jit, r0, i0)
 static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
 #  define movi(r0, i0)                 _movi(_jit, r0, i0)
-static void _movi(jit_state_t*, jit_int32_t, jit_word_t);
+static
+#  if CAN_RIP_ADDRESS
+jit_word_t
+#  else
+void
+#  endif
+_movi(jit_state_t*, jit_int32_t, jit_word_t);
 #  define movi_p(r0, i0)               _movi_p(_jit, r0, i0)
 static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
 #  define movcr(r0, r1)                        _movcr(_jit, r0, r1)
@@ -547,7 +556,7 @@ static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 #  define jng(i0)                      jcc(X86_CC_NG, i0)
 #  define jg(i0)                       jcc(X86_CC_G, i0)
 #  define jnle(i0)                     jcc(X86_CC_NLE, i0)
-static void _jcc(jit_state_t*, jit_int32_t, jit_word_t);
+static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t);
 #  define jccs(code, i0)               _jccs(_jit, code, i0)
 #  define jos(i0)                      jccs(X86_CC_O, i0)
 #  define jnos(i0)                     jccs(X86_CC_NO, i0)
@@ -579,13 +588,15 @@ static void _jcc(jit_state_t*, jit_int32_t, jit_word_t);
 #  define jngs(i0)                     jccs(X86_CC_NG, i0)
 #  define jgs(i0)                      jccs(X86_CC_G, i0)
 #  define jnles(i0)                    jccs(X86_CC_NLE, i0)
-static void _jccs(jit_state_t*, jit_int32_t, jit_word_t);
+static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t);
 #  define jcr(code, i0, r0, r1)                _jcr(_jit, code, i0, r0, r1)
-static void _jcr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _jcr(jit_state_t*,
+                      jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
 #  define jci(code, i0, r0, i1)                _jci(_jit, code, i0, r0, i1)
-static void _jci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+static jit_word_t _jci(jit_state_t*,
+                      jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
 #  define jci0(code, i0, r0)           _jci0(_jit, code, i0, r0)
-static void _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
+static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
 #  define bltr(i0, r0, r1)             _bltr(_jit, i0, r0, r1)
 static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 #  define blti(i0, r0, i1)             _blti(_jit, i0, r0, i1)
@@ -687,7 +698,7 @@ static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
 #    define jmpi_p(i0)                 jmpi(i0)
 #  endif
 #  define jmpsi(i0)                    _jmpsi(_jit, i0)
-static void _jmpsi(jit_state_t*, jit_uint8_t);
+static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t);
 #  define prolog(node)                 _prolog(_jit, node)
 static void _prolog(jit_state_t*, jit_node_t*);
 #  define epilog(node)                 _epilog(_jit, node)
@@ -698,8 +709,8 @@ static void _vastart(jit_state_t*, jit_int32_t);
 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define vaarg_d(r0, r1, i0)          _vaarg_d(_jit, r0, r1, i0)
 static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
-#  define patch_at(node, instr, label) _patch_at(_jit, node, instr, label)
-static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t);
+#  define patch_at(instr, label)       _patch_at(_jit, instr, label)
+static void _patch_at(jit_state_t*, jit_word_t, jit_word_t);
 #  if !defined(HAVE_FFSL)
 #    if __X32
 #      define ffsl(i)                  __builtin_ffs(i)
@@ -735,11 +746,16 @@ _rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md,
 {
     if (ri == _NOREG) {
        if (rb == _NOREG) {
-#if __X32
-           mrm(0x00, r7(rd), 0x05);
-#else
-           mrm(0x00, r7(rd), 0x04);
-           sib(_SCL1, 0x04, 0x05);
+           /* Use ms == _SCL8 to tell it is a %rip relative displacement */
+#if __X64
+           if (ms == _SCL8)
+#endif
+               mrm(0x00, r7(rd), 0x05);
+#if __X64
+           else {
+               mrm(0x00, r7(rd), 0x04);
+               sib(_SCL1, 0x04, 0x05);
+           }
 #endif
            ii(md);
        }
@@ -1036,6 +1052,49 @@ _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     }
 }
 
+static void
+_iaddxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    /* FIXME: this is not doing what I did expect for the simple test case:
+     *         mov  $0xffffffffffffffff, %rax  -- rax = 0xffffffffffffffff (-1)
+     *         mov  $0xffffffffffffffff, %r10  -- r10 = 0xffffffffffffffff (-1)
+     *         mov  $0x1, %r11d                -- r11 = 1
+     *         xor  %rbx, %rbx                 -- rbx = 0
+     * (gdb) p $eflags
+     * $1 = [ PF ZF IF ]
+     *         add  %r11, %rax                 -- r11 = 0x10000000000000000 (0)
+     *                             does not fit in 64 bit ^
+     * (gdb) p $eflags
+     * $2 = [ CF PF AF ZF IF ]
+     *         adcx %r10, %rbx                 -- r10 = 0xffffffffffffffff (-1)
+     * (gdb) p $eflags
+     * $3 = [ CF PF AF ZF IF ]
+     * (gdb) p/x $r10
+     * $4 = 0xffffffffffffffff
+     * but, r10 should be zero, as it is:
+     * -1 (%r10) + 0 (%rbx) + carry (!!eflags.CF)
+     * FIXME: maybe should only use ADCX in the third operation onward, that
+     * is, after the first ADC? In either case, the add -1+0+carry should
+     * have used and consumed the carry? At least this is what is expected
+     * in Lightning...
+     */
+#if 0
+    /* Significantly longer instruction, but avoid cpu stalls as only
+     * the carry flag is used in a sequence. */
+    if (jit_cpu.adx) {
+       /* ADCX */
+       ic(0x66);
+       rex(0, WIDE, r1, _NOREG, r0);
+       ic(0x0f);
+       ic(0x38);
+       ic(0xf6);
+       mrm(0x03, r7(r1), r7(r0));
+    }
+    else
+#endif
+       alur(X86_ADC, r0, r1);
+}
+
 static void
 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -1051,7 +1110,12 @@ static void
 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+    if (
+#if 0
+       /* Do not mix ADC and ADCX */
+       !jit_cpu.adx &&
+#endif
+       can_sign_extend_int_p(i0)) {
        movr(r0, r1);
        iaddxi(r0, i0);
     }
@@ -1913,6 +1977,88 @@ _decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 }
 #endif
 
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, x;
+    /* LZCNT */
+    if (jit_cpu.abm)
+       ic(0xf3);
+    /* else BSR */
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xbd);
+    mrm(0x3, r7(r0), r7(r1));
+    if (!jit_cpu.abm) {
+       /* jump if undefined: r1 == 0 */
+       w = jccs(X86_CC_E, _jit->pc.w);
+       /* count leading zeros */
+       rsbi(r0, r0, __WORDSIZE - 1);
+       /* done */
+       x = jmpsi(_jit->pc.w);
+       /* if r1 == 0 */
+       patch_at(w, _jit->pc.w);
+       movi(r0, __WORDSIZE);
+       /* not undefined */
+       patch_at(x, _jit->pc.w);
+    }
+    /* LZCNT has defined behavior for value zero and count leading zeros */
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    ctzr(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    if (!jit_cpu.abm) {
+       if (jit_cmov_p())
+           t0 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk);
+       else
+           t0 = _NOREG;
+       if (t0 != _NOREG)
+           movi(rn(t0), __WORDSIZE);
+    }
+    /* TZCNT */
+    if (jit_cpu.abm)
+       ic(0xf3);
+    /* else BSF */
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xbc);
+    mrm(0x3, r7(r0), r7(r1));
+    if (!jit_cpu.abm) {
+       /* No conditional move or need spill/reload a temporary */
+       if (t0 == _NOREG) {
+           w = jccs(X86_CC_E, _jit->pc.w);
+           movi(r0, __WORDSIZE);
+           patch_at(w, _jit->pc.w);
+       }
+       else {
+           /* CMOVE */
+           rex(0, WIDE, r0, _NOREG, rn(t0));
+           ic(0x0f);
+           ic(0x44);
+           mrm(0x3, r7(r0), r7(rn(t0)));
+           jit_unget_reg(t0);
+       }
+    }
+    /* TZCNT has defined behavior for value zero */
+}
+
 static void
 _cr(jit_state_t *_jit,
     jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
@@ -2162,6 +2308,12 @@ _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        ii(i0);
 #  if !__X64_32
     }
+    else if (can_sign_extend_int_p(i0)) {
+       rex(0, 1, _NOREG, _NOREG, r0);
+       ic(0xc7);
+       ic(0xc0 | r7(r0));
+       ii(i0);
+    }
     else {
        rex(0, 1, _NOREG, _NOREG, r0);
        ic(0xb8 | r7(r0));
@@ -2174,22 +2326,45 @@ _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 #endif
 }
 
+#if CAN_RIP_ADDRESS
+static jit_word_t
+#else
 static void
+#endif
 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
+#if CAN_RIP_ADDRESS
+    jit_word_t         w, rel;
+    w = _jit->pc.w;
+    rel = i0 - (w + 8);
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       /* lea rel(%rip), %r0 */
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       w = _jit->pc.w;
+       ic(0x8d);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
     if (i0)
        imovi(r0, i0);
     else
        ixorr(r0, r0);
+#if CAN_RIP_ADDRESS
+    return (w);
+#endif
 }
 
 static jit_word_t
 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
+    jit_word_t         w;
     rex(0, WIDE, _NOREG, _NOREG, r0);
+    w = _jit->pc.w;
     ic(0xb8 | r7(r0));
     il(i0);
-    return (_jit->pc.w);
+    return (w);
 }
 
 static void
@@ -2404,7 +2579,18 @@ static void
 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xbe);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xbe);
@@ -2431,7 +2617,18 @@ static void
 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xb6);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xb6);
@@ -2458,7 +2655,18 @@ static void
 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xbf);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xbf);
@@ -2485,7 +2693,18 @@ static void
 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xb7);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xb7);
@@ -2516,7 +2735,17 @@ static void
 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x63);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
 #if __X64
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x63);
@@ -2547,7 +2776,17 @@ static void
 _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#  if !__X64_32
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, 0, r0, _NOREG, _NOREG);
+       ic(0x63);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, 0, r0, _NOREG, _NOREG);
        ic(0x63);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
@@ -2555,7 +2794,11 @@ _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
+#  if __X64_32
+       ldr_i(r0, rn(reg));
+#  else
        ldr_ui(r0, rn(reg));
+#  endif
        jit_unget_reg(reg);
     }
 }
@@ -2573,8 +2816,15 @@ static void
 _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x8b);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x8b);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
     }
@@ -2778,7 +3028,11 @@ _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
+#  if __X64_32
+       ldxr_i(r0, r1, rn(reg));
+#  else
        ldxr_ui(r0, r1, rn(reg));
+#  endif
        jit_unget_reg(reg);
     }
 }
@@ -2834,7 +3088,27 @@ static void
 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 16 : rel + 16;
+    if (can_sign_extend_int_p(rel)) {
+       if (reg8_p(r0)) {
+           rex(0, 0, r0, _NOREG, _NOREG);
+           ic(0x88);
+           rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+           movr(rn(reg), r0);
+           rex(0, 0, rn(reg), _NOREG, _NOREG);
+           ic(0x88);
+           rx(rn(reg), i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+#endif
+    if (address_p(i0)) {
        if (reg8_p(r0)) {
            rex(0, 0, r0, _NOREG, _NOREG);
            ic(0x88);
@@ -2870,7 +3144,18 @@ static void
 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       ic(0x66);
+       rex(0, 0, r0, _NOREG, _NOREG);
+       ic(0x89);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        ic(0x66);
        rex(0, 0, r0, _NOREG, _NOREG);
        ic(0x89);
@@ -2896,7 +3181,17 @@ static void
 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, 0, r0, _NOREG, _NOREG);
+       ic(0x89);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, 0, r0, _NOREG, _NOREG);
        ic(0x89);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
@@ -2922,8 +3217,18 @@ static void
 _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x89);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
+       rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x89);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
     }
@@ -3084,208 +3389,221 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 }
 #endif
 
-static void
+static jit_word_t
 _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
 {
+    jit_word_t         d;
     jit_word_t         w;
+    w = _jit->pc.w;
+    d = i0 - (w + 1);
     ic(0x70 | code);
-    w = i0 - (_jit->pc.w + 1);
-    ic(w);
+    ic(d);
+    return (w);
 }
 
-static void
+static jit_word_t
 _jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
 {
+    jit_word_t         d;
     jit_word_t         w;
+    w = _jit->pc.w;
     ic(0x0f);
+    d = i0 - (w + 6);
     ic(0x80 | code);
-    w = i0 - (_jit->pc.w + 4);
-    ii(w);
+    ii(d);
+    return (w);
 }
 
-static void
+static jit_word_t
 _jcr(jit_state_t *_jit,
      jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     alur(X86_CMP, r0, r1);
-    jcc(code, i0);
+    return (jcc(code, i0));
 }
 
-static void
+static jit_word_t
 _jci(jit_state_t *_jit,
      jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     alui(X86_CMP, r0, i1);
-    jcc(code, i0);
+    return (jcc(code, i0));
 }
 
-static void
+static jit_word_t
 _jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
 {
     testr(r0, r0);
-    jcc(code, i0);
+    return (jcc(code, i0));
 }
 
 static jit_word_t
 _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_L, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_L, i0, r0, r1));
 }
 
 static jit_word_t
 _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_L, i0, r0, i1);
-    else               jci0(X86_CC_S, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_L, i0, r0, i1);
+    else               w = jci0(X86_CC_S, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_B, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_B, i0, r0, r1));
 }
 
 static jit_word_t
 _blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_B, i0, r0, i1);
-    else               jci0(X86_CC_B, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_B, i0, r0, i1);
+    else               w = jci0(X86_CC_B, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_LE, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_LE, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_LE, i0, r0, i1);
-    else               jci0(X86_CC_LE, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_LE, i0, r0, i1);
+    else               w = jci0(X86_CC_LE, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_BE, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_BE, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_BE, i0, r0, i1);
-    else               jci0(X86_CC_BE, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_BE, i0, r0, i1);
+    else               w = jci0(X86_CC_BE, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_E, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_E, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_E, i0, r0, i1);
-    else               jci0(X86_CC_E, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_E, i0, r0, i1);
+    else               w = jci0(X86_CC_E, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_GE, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_GE, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_GE, i0, r0, i1);
-    else               jci0(X86_CC_NS, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_GE, i0, r0, i1);
+    else               w = jci0(X86_CC_NS, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_AE, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_AE, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_AE, i0, r0, i1);
-    else               jmpi(i0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_AE, i0, r0, i1);
+    else               w = jmpi(i0);
+    return (w);
 }
 
 static jit_word_t
 _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_G, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_G, i0, r0, r1));
 }
 
 static jit_word_t
 _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    jci(X86_CC_G, i0, r0, i1);
-    return (_jit->pc.w);
+    return (jci(X86_CC_G, i0, r0, i1));
 }
 
 static jit_word_t
 _bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_A, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_A, i0, r0, r1));
 }
 
 static jit_word_t
 _bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_A, i0, r0, i1);
-    else               jci0(X86_CC_NE, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_A, i0, r0, i1);
+    else               w = jci0(X86_CC_NE, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_NE, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_NE, i0, r0, r1));
 }
 
 static jit_word_t
 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_NE, i0, r0, i1);
-    else               jci0(X86_CC_NE, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_NE, i0, r0, i1);
+    else               w = jci0(X86_CC_NE, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     testr(r0, r1);
-    jnz(i0);
-    return (_jit->pc.w);
+    return (jnz(i0));
 }
 
 static jit_word_t
@@ -3300,16 +3618,14 @@ _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        testr(r0, rn(reg));
        jit_unget_reg(reg);
     }
-    jnz(i0);
-    return (_jit->pc.w);
+    return (jnz(i0));
 }
 
 static jit_word_t
 _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     testr(r0, r1);
-    jz(i0);
-    return (_jit->pc.w);
+    return (jz(i0));
 }
 
 static jit_word_t
@@ -3324,16 +3640,14 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        testr(r0, rn(reg));
        jit_unget_reg(reg);
     }
-    jz(i0);
-    return (_jit->pc.w);
+    return (jz(i0));
 }
 
 static jit_word_t
 _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
-    jo(i0);
-    return (_jit->pc.w);
+    return (jo(i0));
 }
 
 static jit_word_t
@@ -3342,8 +3656,7 @@ _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
-       jo(i0);
-       return (_jit->pc.w);
+       return (jo(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3355,8 +3668,7 @@ static jit_word_t
 _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
-    jc(i0);
-    return (_jit->pc.w);
+    return (jc(i0));
 }
 
 static jit_word_t
@@ -3365,8 +3677,7 @@ _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
-       jc(i0);
-       return (_jit->pc.w);
+       return (jc(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3378,8 +3689,7 @@ static jit_word_t
 _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
-    jno(i0);
-    return (_jit->pc.w);
+    return (jno(i0));
 }
 
 static jit_word_t
@@ -3388,8 +3698,7 @@ _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
-       jno(i0);
-       return (_jit->pc.w);
+       return (jno(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3401,8 +3710,7 @@ static jit_word_t
 _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
-    jnc(i0);
-    return (_jit->pc.w);
+    return (jnc(i0));
 }
 
 static jit_word_t
@@ -3411,8 +3719,7 @@ _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
-       jnc(i0);
-       return (_jit->pc.w);
+       return (jnc(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3424,8 +3731,7 @@ static jit_word_t
 _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
-    jo(i0);
-    return (_jit->pc.w);
+    return (jo(i0));
 }
 
 static jit_word_t
@@ -3434,8 +3740,7 @@ _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
-       jo(i0);
-       return (_jit->pc.w);
+       return (jo(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3447,8 +3752,7 @@ static jit_word_t
 _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
-    jc(i0);
-    return (_jit->pc.w);
+    return (jc(i0));
 }
 
 static jit_word_t
@@ -3457,8 +3761,7 @@ _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
-       jc(i0);
-       return (_jit->pc.w);
+       return (jc(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3470,8 +3773,7 @@ static jit_word_t
 _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
-    jno(i0);
-    return (_jit->pc.w);
+    return (jno(i0));
 }
 
 static jit_word_t
@@ -3480,8 +3782,7 @@ _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
-       jno(i0);
-       return (_jit->pc.w);
+       return (jno(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3493,8 +3794,7 @@ static jit_word_t
 _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
-    jnc(i0);
-    return (_jit->pc.w);
+    return (jnc(i0));
 }
 
 static jit_word_t
@@ -3503,8 +3803,7 @@ _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
-       jnc(i0);
-       return (_jit->pc.w);
+       return (jnc(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3523,35 +3822,39 @@ _callr(jit_state_t *_jit, jit_int32_t r0)
 static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         word;
     jit_word_t         w;
+    jit_word_t         d;
+    jit_word_t         l = _jit->pc.w + 5;
+    d = i0 - l;
 #if __X64
-    w = i0 - (_jit->pc.w + 5);
-    if ((jit_int32_t)w == w) {
+    if (
+#  if __X64_32
+       !((d < 0) ^ (l < 0)) &&
+#  endif
+       (jit_int32_t)d == d) {
 #endif
+       w = _jit->pc.w;
        ic(0xe8);
-       w = i0 - (_jit->pc.w + 4);
-       ii(w);
-       word = _jit->pc.w;
+       ii(d);
 #if __X64
     }
     else
-       word = calli_p(i0);
+       w = calli_p(i0);
 #endif
-    return (word);
+    return (w);
 }
 
 #if __X64
 static jit_word_t
 _calli_p(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         word;
+    jit_word_t         w;
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
-    word = movi_p(rn(reg), i0);
+    w = movi_p(rn(reg), i0);
     callr(rn(reg));
     jit_unget_reg(reg);
-    return (word);
+    return (w);
 }
 #endif
 
@@ -3566,51 +3869,58 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
 static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         word;
     jit_word_t         w;
+    jit_word_t         d;
+    jit_word_t         l = _jit->pc.w + 5;
+    d = i0 - l;
 #if __X64
-    w = i0 - (_jit->pc.w + 5);
-    if ((jit_int32_t)w == w) {
+    if (
+#  if __X64_32
+       !((d < 0) ^ (l < 0)) &&
+#  endif
+       (jit_int32_t)d == d) {
 #endif
+       w = _jit->pc.w;
        ic(0xe9);
-       w = i0 - (_jit->pc.w + 4);
-       ii(w);
-       word = _jit->pc.w;
+       ii(d);
 #if __X64
     }
     else
-       word = jmpi_p(i0);
+       w = jmpi_p(i0);
 #endif
-    return (word);
+    return (w);
 }
 
 #if __X64
 static jit_word_t
 _jmpi_p(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         word;
+    jit_word_t         w;
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    word = movi_p(rn(reg), i0);
+    w = movi_p(rn(reg), i0);
     jmpr(rn(reg));
     jit_unget_reg(reg);
-    return (word);
+    return (w);
 }
 #endif
 
-static void
+static jit_word_t
 _jmpsi(jit_state_t *_jit, jit_uint8_t i0)
 {
+    jit_word_t         w = _jit->pc.w;
     ic(0xeb);
     ic(i0);
+    return (w);
 }
 
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -3623,76 +3933,51 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
                                (_jitc->function->self.alen > 32 ?
                                 _jitc->function->self.alen : 32) -
                                /* align stack at 16 bytes */
-                               _jitc->function->self.aoff) + 15) & -16) +
-       stack_adjust;
+                               _jitc->function->self.aoff) + 15) & -16);
 #else
     _jitc->function->stack = (((_jitc->function->self.alen -
-                              _jitc->function->self.aoff) + 15) & -16) +
-       stack_adjust;
+                              _jitc->function->self.aoff) + 15) & -16);
 #endif
-    subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+
+    if (_jitc->function->stack)
+       _jitc->function->need_stack = 1;
+
+    if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               _jitc->function->need_stack = 1;
+               break;
+           }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       subi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
     /* callee save registers */
-#if __X32
-    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
-       stxi(12, _RSP_REGNO, _RDI_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
-       stxi( 8, _RSP_REGNO, _RSI_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       stxi( 4, _RSP_REGNO, _RBX_REGNO);
-#else
-#  if __CYGWIN__ || _WIN32
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
-       sse_stxi_d(136, _RSP_REGNO, _XMM15_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
-       sse_stxi_d(128, _RSP_REGNO, _XMM14_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
-       sse_stxi_d(120, _RSP_REGNO, _XMM13_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
-       sse_stxi_d(112, _RSP_REGNO, _XMM12_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
-       sse_stxi_d(104, _RSP_REGNO, _XMM11_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
-       sse_stxi_d(96, _RSP_REGNO, _XMM10_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
-       sse_stxi_d(88, _RSP_REGNO, _XMM9_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
-       sse_stxi_d(80, _RSP_REGNO, _XMM8_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
-       sse_stxi_d(72, _RSP_REGNO, _XMM7_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
-       sse_stxi_d(64, _RSP_REGNO, _XMM6_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
-       stxi(56, _RSP_REGNO, _R15_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
-       stxi(48, _RSP_REGNO, _R14_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
-       stxi(40, _RSP_REGNO, _R13_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
-       stxi(32, _RSP_REGNO, _R12_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
-       stxi(24, _RSP_REGNO, _RSI_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
-       stxi(16, _RSP_REGNO, _RDI_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       stxi( 8, _RSP_REGNO, _RBX_REGNO);
-#  else
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       stxi(40, _RSP_REGNO, _RBX_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
-       stxi(32, _RSP_REGNO, _R12_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
-       stxi(24, _RSP_REGNO, _R13_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
-       stxi(16, _RSP_REGNO, _R14_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
-       stxi( 8, _RSP_REGNO, _R15_REGNO);
-#  endif
+    for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           stxi(offs, _RSP_REGNO, rn(iregs[reg]));
+           offs += REAL_WORDSIZE;
+       }
+    }
+#if __X64 && (__CYGWIN__ || _WIN32)
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           sse_stxi_d(offs, _RSP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
 #endif
-    stxi(0, _RSP_REGNO, _RBP_REGNO);
-    movr(_RBP_REGNO, _RSP_REGNO);
+
+    if (_jitc->function->need_frame) {
+       stxi(0, _RSP_REGNO, _RBP_REGNO);
+       movr(_RBP_REGNO, _RSP_REGNO);
+    }
 
     /* alloca */
-    subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
+    if (_jitc->function->stack)
+       subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), _jitc->function->self.aoff);
@@ -3716,8 +4001,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            /* test %al, %al */
            ic(0x84);
            ic(0xc0);
-           jes(0);
-           nofp_code = _jit->pc.w;
+           nofp_code = jes(0);
 
            /* Save fp registers in the save area, if any is a vararg */
            /* Note that the full 16 byte xmm is not saved, because
@@ -3728,7 +4012,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
                sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
                           reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
 
-           patch_rel_char(nofp_code, _jit->pc.w);
+           patch_at(nofp_code, _jit->pc.w);
        }
     }
 #endif
@@ -3737,68 +4021,38 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, offs;
     if (_jitc->function->assume_frame)
        return;
+    if (_jitc->function->need_frame)
+       movr(_RSP_REGNO, _RBP_REGNO);
+
     /* callee save registers */
-    movr(_RSP_REGNO, _RBP_REGNO);
-#if __X32
-    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
-       ldxi(_RDI_REGNO, _RSP_REGNO, 12);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
-       ldxi(_RSI_REGNO, _RSP_REGNO,  8);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       ldxi(_RBX_REGNO, _RSP_REGNO,  4);
-#else
-#  if __CYGWIN__ || _WIN32
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
-       sse_ldxi_d(_XMM15_REGNO, _RSP_REGNO, 136);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
-       sse_ldxi_d(_XMM14_REGNO, _RSP_REGNO, 128);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
-       sse_ldxi_d(_XMM13_REGNO, _RSP_REGNO, 120);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
-       sse_ldxi_d(_XMM12_REGNO, _RSP_REGNO, 112);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
-       sse_ldxi_d(_XMM11_REGNO, _RSP_REGNO, 104);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
-       sse_ldxi_d(_XMM10_REGNO, _RSP_REGNO, 96);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
-       sse_ldxi_d(_XMM9_REGNO, _RSP_REGNO, 88);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
-       sse_ldxi_d(_XMM8_REGNO, _RSP_REGNO, 80);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
-       sse_ldxi_d(_XMM7_REGNO, _RSP_REGNO, 72);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
-       sse_ldxi_d(_XMM6_REGNO, _RSP_REGNO, 64);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
-       ldxi(_R15_REGNO, _RSP_REGNO, 56);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
-       ldxi(_R14_REGNO, _RSP_REGNO, 48);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
-       ldxi(_R13_REGNO, _RSP_REGNO, 40);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
-       ldxi(_R12_REGNO, _RSP_REGNO, 32);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
-       ldxi(_RSI_REGNO, _RSP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
-       ldxi(_RDI_REGNO, _RSP_REGNO, 16);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       ldxi(_RBX_REGNO, _RSP_REGNO,  8);
-#  else
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       ldxi(_RBX_REGNO, _RSP_REGNO, 40);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
-       ldxi(_R12_REGNO, _RSP_REGNO, 32);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
-       ldxi(_R13_REGNO, _RSP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
-       ldxi(_R14_REGNO, _RSP_REGNO, 16);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
-       ldxi(_R15_REGNO, _RSP_REGNO,  8);
-#  endif
+    for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           ldxi(rn(iregs[reg]), _RSP_REGNO, offs);
+           offs += REAL_WORDSIZE;
+       }
+    }
+#if __X64 && (__CYGWIN__ || _WIN32)
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           sse_ldxi_d(rn(fregs[reg]), _RSP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
 #endif
-    ldxi(_RBP_REGNO, _RSP_REGNO, 0);
-    addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+
+    if (_jitc->function->need_frame) {
+       ldxi(_RBP_REGNO, _RSP_REGNO, 0);
+       addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
+    }
+    /* This condition does not happen as much as expected because
+     * it is not safe to not create a frame pointer if any function
+     * is called, even jit functions, as those might call external
+     * functions. */
+    else if (_jitc->function->need_stack)
+       addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
 
     ic(0xc3);
 }
@@ -3808,7 +4062,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
 {
 #if __X32 || __CYGWIN__ || _WIN32
     assert(_jitc->function->self.call & jit_call_varargs);
-    addi(r0, _RBP_REGNO, _jitc->function->self.size);
+    addi(r0, _RBP_REGNO, jit_selfsize());
 #else
     jit_int32_t                reg;
 
@@ -3827,7 +4081,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
 
     /* Initialize overflow pointer to the first stack argument. */
-    addi(rn(reg), _RBP_REGNO, _jitc->function->self.size);
+    addi(rn(reg), _RBP_REGNO, jit_selfsize());
     stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
 
     /* Initialize register save area pointer. */
@@ -3861,8 +4115,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 
     /* Jump over if there are no remaining arguments in the save area. */
     icmpi(rn(rg0), va_gp_max_offset);
-    jaes(0);
-    ge_code = _jit->pc.w;
+    ge_code = jaes(0);
 
     /* Load the save area pointer in the second temporary. */
     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
@@ -3878,11 +4131,10 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
-    jmpsi(0);
-    lt_code = _jit->pc.w;
+    lt_code = jmpsi(0);
 
     /* Where to land if argument is in overflow area. */
-    patch_rel_char(ge_code, _jit->pc.w);
+    patch_at(ge_code, _jit->pc.w);
 
     /* Load overflow pointer. */
     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
@@ -3895,7 +4147,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
 
     /* Where to land if argument is in save area. */
-    patch_rel_char(lt_code, _jit->pc.w);
+    patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
 #endif
@@ -3929,8 +4181,7 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
 
     /* Jump over if there are no remaining arguments in the save area. */
     icmpi(rn(rg0), va_fp_max_offset);
-    jaes(0);
-    ge_code = _jit->pc.w;
+    ge_code = jaes(0);
 
     /* Load the save area pointer in the second temporary. */
     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
@@ -3949,11 +4200,10 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
-    jmpsi(0);
-    lt_code = _jit->pc.w;
+    lt_code = jmpsi(0);
 
     /* Where to land if argument is in overflow area. */
-    patch_rel_char(ge_code, _jit->pc.w);
+    patch_at(ge_code, _jit->pc.w);
 
     /* Load overflow pointer. */
     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
@@ -3969,27 +4219,57 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
     stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
 
     /* Where to land if argument is in save area. */
-    patch_rel_char(lt_code, _jit->pc.w);
+    patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
 #endif
 }
 
 static void
-_patch_at(jit_state_t *_jit, jit_node_t *node,
-         jit_word_t instr, jit_word_t label)
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
 {
-    switch (node->code) {
-#  if __X64
-       case jit_code_calli:
-       case jit_code_jmpi:
-#  endif
-       case jit_code_movi:
-           patch_abs(instr, label);
+    jit_word_t          disp;
+    jit_uint8_t                *code = (jit_uint8_t *)instr;
+    ++instr;
+    switch (code[0]) {
+       /* movi_p */
+       case 0xb8 ... 0xbf:
+           *(jit_word_t *)instr = label;
            break;
-       default:
-           patch_rel(instr, label);
+           /* forward pc relative address known to be in range */
+#if CAN_RIP_ADDRESS
+       /* movi */
+       case 0x8d:
+           ++instr;
+           goto apply;
+#endif
+       /* jcc */
+       case 0x0f:
+           ++instr;
+           if (code[1] < 0x80 || code[1] > 0x8f)
+               goto fail;
+       /* calli */
+       case 0xe8:
+       /* jmpi */
+       case 0xe9:
+#if CAN_RIP_ADDRESS
+       apply:
+#endif
+           disp = label - (instr + 4);
+           assert((jit_int32_t)disp == disp);
+           *(jit_int32_t *)instr = disp;
+           break;
+           /* jccs */
+       case 0x70 ... 0x7f:
+           /* jmpsi */
+       case 0xeb:
+           disp = label - (instr + 1);
+           assert((jit_int8_t)disp == disp);
+           *(jit_int8_t *)instr = disp;
            break;
+       default:
+       fail:
+           abort();
     }
 }
 #endif
index 4447a52..c3ac895 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  */
 
 #if PROTO
-#  if __X32
-#    define sse_address_p(i0)          1
-#  else
-#    if __X64_32
-#      define sse_address_p(i0)                ((jit_word_t)(i0) >= 0)
-#    else
-#      define sse_address_p(i0)                can_sign_extend_int_p(i0)
-#    endif
-#  endif
 #  define _XMM6_REGNO                  6
 #  define _XMM7_REGNO                  7
 #  define _XMM8_REGNO                  8
@@ -470,14 +461,14 @@ _sse_b##name##i_##type(jit_state_t *_jit,                         \
                       jit_word_t i0, jit_int32_t r0,                   \
                       jit_float##size##_t *i1)                         \
 {                                                                      \
-    jit_word_t         word;                                           \
+    jit_word_t         w;                                              \
     jit_int32_t                reg = jit_get_reg(jit_class_fpr|jit_class_xpr|  \
                                          jit_class_nospill);           \
     assert(jit_sse_reg_p(reg));                                                \
     sse_movi_##type(rn(reg), i1);                                      \
-    word = sse_b##name##r_##type(i0, r0, rn(reg));                     \
+    w = sse_b##name##r_##type(i0, r0, rn(reg));                                \
     jit_unget_reg(reg);                                                        \
-    return (word);                                                     \
+    return (w);                                                                \
 }
 #  define fopi(name)                   fpr_opi(name, f, 32)
 #  define fbopi(name)                  fpr_bopi(name, f, 32)
@@ -809,8 +800,17 @@ _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
        ldi = !_jitc->no_data;
 #if __X64
        /* if will allocate a register for offset, just use immediate */
-       if (ldi && !sse_address_p(i0))
+#  if CAN_RIP_ADDRESS
+       if (ldi) {
+           jit_word_t  rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+           ldi = can_sign_extend_int_p(rel);
+           if (!ldi && address_p(i0))
+               ldi = 1;
+       }
+#  else
+       if (ldi && !address_p(i0))
            ldi = 0;
+#  endif
 #endif
        if (ldi)
            sse_ldi_f(r0, (jit_word_t)i0);
@@ -840,10 +840,9 @@ _sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     }
     ixorr(reg, reg);
     ucomissr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_E, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
@@ -866,10 +865,9 @@ _sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     }
     imovi(reg, 1);
     ucomissr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_NE, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
@@ -928,7 +926,13 @@ static void
 _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+    if (can_sign_extend_int_p(rel))
+       movssmr(rel, _NOREG, _NOREG, _SCL8, r0);
+    else
+#endif
+    if (address_p(i0))
        movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -975,7 +979,13 @@ static void
 _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+    if (can_sign_extend_int_p(rel))
+       movssrm(r0, rel, _NOREG, _NOREG, _SCL8);
+    else
+#endif
+    if (address_p(i0))
        movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1022,8 +1032,7 @@ static jit_word_t
 _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r1, r0);
-    ja(i0);
-    return (_jit->pc.w);
+    return (ja(i0));
 }
 fbopi(lt)
 
@@ -1031,21 +1040,20 @@ static jit_word_t
 _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r1, r0);
-    jae(i0);
-    return (_jit->pc.w);
+    return (jae(i0));
 }
 fbopi(le)
 
 static jit_word_t
 _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     jit_word_t         jp_code;
     ucomissr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    je(i0);
-    patch_rel_char(jp_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jps(0);
+    w = je(i0);
+    patch_at(jp_code, _jit->pc.w);
+    return (w);
 }
 fbopi(eq)
 
@@ -1053,8 +1061,7 @@ static jit_word_t
 _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jae(i0);
-    return (_jit->pc.w);
+    return (jae(i0));
 }
 fbopi(ge)
 
@@ -1062,25 +1069,23 @@ static jit_word_t
 _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    ja(i0);
-    return (_jit->pc.w);
+    return (ja(i0));
 }
 fbopi(gt)
 
 static jit_word_t
 _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     jit_word_t         jp_code;
     jit_word_t         jz_code;
     ucomissr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    jzs(0);
-    jz_code = _jit->pc.w;
-    patch_rel_char(jp_code, _jit->pc.w);
-    jmpi(i0);
-    patch_rel_char(jz_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jps(0);
+    jz_code = jzs(0);
+    patch_at(jp_code, _jit->pc.w);
+    w = jmpi(i0);
+    patch_at(jz_code, _jit->pc.w);
+    return (w);
 }
 fbopi(ne)
 
@@ -1088,47 +1093,49 @@ static jit_word_t
 _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jnae(i0);
-    return (_jit->pc.w);
+    return (jnae(i0));
 }
 fbopi(unlt)
 
 static jit_word_t
 _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomissr(r0, r1);
-       jna(i0);
+       w = jna(i0);
     }
-    return (_jit->pc.w);
+    return (w);
 }
 fbopi(unle)
 
 static jit_word_t
 _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomissr(r0, r1);
-       je(i0);
+       w = je(i0);
     }
-    return (_jit->pc.w);
+    return (w);
 }
 fbopi(uneq)
 
 static jit_word_t
 _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomissr(r1, r0);
-       jna(i0);
+       w = jna(i0);
     }
-    return (_jit->pc.w);
+    return (w);
 }
 fbopi(unge)
 
@@ -1136,8 +1143,7 @@ static jit_word_t
 _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r1, r0);
-    jnae(i0);
-    return (_jit->pc.w);
+    return (jnae(i0));
 }
 fbopi(ungt)
 
@@ -1145,8 +1151,7 @@ static jit_word_t
 _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jne(i0);
-    return (_jit->pc.w);
+    return (jne(i0));
 }
 fbopi(ltgt)
 
@@ -1154,8 +1159,7 @@ static jit_word_t
 _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jnp(i0);
-    return (_jit->pc.w);
+    return (jnp(i0));
 }
 fbopi(ord)
 
@@ -1163,8 +1167,7 @@ static jit_word_t
 _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jp(i0);
-    return (_jit->pc.w);
+    return (jp(i0));
 }
 fbopi(unord)
 
@@ -1185,10 +1188,9 @@ _sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     }
     ixorr(reg, reg);
     ucomisdr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_E, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
@@ -1211,10 +1213,9 @@ _sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     }
     imovi(reg, 1);
     ucomisdr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_NE, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
@@ -1294,8 +1295,17 @@ _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
        ldi = !_jitc->no_data;
 #if __X64
        /* if will allocate a register for offset, just use immediate */
-       if (ldi && !sse_address_p(i0))
+#  if CAN_RIP_ADDRESS
+       if (ldi) {
+           jit_word_t  rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+           ldi = can_sign_extend_int_p(rel);
+           if (!ldi && address_p(i0))
+               ldi = 1;
+       }
+#  else
+       if (ldi && !address_p(i0))
            ldi = 0;
+#  endif
 #endif
        if (ldi)
            sse_ldi_d(r0, (jit_word_t)i0);
@@ -1306,6 +1316,7 @@ _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
            movdqxr(r0, rn(reg));
            jit_unget_reg(reg);
 #else
+           CHECK_CVT_OFFSET();
            movi(rn(reg), data.ii[0]);
            stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
            movi(rn(reg), data.ii[1]);
@@ -1321,7 +1332,13 @@ static void
 _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+    if (can_sign_extend_int_p(rel))
+       movsdmr(rel, _NOREG, _NOREG, _SCL8, r0);
+    else
+#endif
+    if (address_p(i0))
        movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1368,7 +1385,13 @@ static void
 _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+    if (can_sign_extend_int_p(rel))
+       movsdrm(r0, rel, _NOREG, _NOREG, _SCL8);
+    else
+#endif
+    if (address_p(i0))
        movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1415,8 +1438,7 @@ static jit_word_t
 _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r1, r0);
-    ja(i0);
-    return (_jit->pc.w);
+    return (ja(i0));
 }
 dbopi(lt)
 
@@ -1424,21 +1446,20 @@ static jit_word_t
 _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r1, r0);
-    jae(i0);
-    return (_jit->pc.w);
+    return (jae(i0));
 }
 dbopi(le)
 
 static jit_word_t
 _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     jit_word_t         jp_code;
     ucomisdr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    je(i0);
-    patch_rel_char(jp_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jps(0);
+    w = je(i0);
+    patch_at(jp_code, _jit->pc.w);
+    return (w);
 }
 dbopi(eq)
 
@@ -1446,8 +1467,7 @@ static jit_word_t
 _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jae(i0);
-    return (_jit->pc.w);
+    return (jae(i0));
 }
 dbopi(ge)
 
@@ -1455,25 +1475,23 @@ static jit_word_t
 _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    ja(i0);
-    return (_jit->pc.w);
+    return (ja(i0));
 }
 dbopi(gt)
 
 static jit_word_t
 _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     jit_word_t         jp_code;
     jit_word_t         jz_code;
     ucomisdr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    jzs(0);
-    jz_code = _jit->pc.w;
-    patch_rel_char(jp_code, _jit->pc.w);
-    jmpi(i0);
-    patch_rel_char(jz_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jps(0);
+    jz_code = jzs(0);
+    patch_at(jp_code, _jit->pc.w);
+    w = jmpi(i0);
+    patch_at(jz_code, _jit->pc.w);
+    return (w);
 }
 dbopi(ne)
 
@@ -1481,47 +1499,49 @@ static jit_word_t
 _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jnae(i0);
-    return (_jit->pc.w);
+    return (jnae(i0));
 }
 dbopi(unlt)
 
 static jit_word_t
 _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomisdr(r0, r1);
-       jna(i0);
+       w = jna(i0);
     }
-    return (_jit->pc.w);
+    return (w);
 }
 dbopi(unle)
 
 static jit_word_t
 _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomisdr(r0, r1);
-       je(i0);
+       w = je(i0);
     }
-    return (_jit->pc.w);
+    return (w);
 }
 dbopi(uneq)
 
 static jit_word_t
 _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomisdr(r1, r0);
-       jna(i0);
+       w = jna(i0);
     }
-    return (_jit->pc.w);
+    return (w);
 }
 dbopi(unge)
 
@@ -1529,8 +1549,7 @@ static jit_word_t
 _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r1, r0);
-    jnae(i0);
-    return (_jit->pc.w);
+    return (jnae(i0));
 }
 dbopi(ungt)
 
@@ -1538,8 +1557,7 @@ static jit_word_t
 _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jne(i0);
-    return (_jit->pc.w);
+    return (jne(i0));
 }
 dbopi(ltgt)
 
@@ -1547,8 +1565,7 @@ static jit_word_t
 _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jnp(i0);
-    return (_jit->pc.w);
+    return (jnp(i0));
 }
 dbopi(ord)
 
@@ -1556,8 +1573,7 @@ static jit_word_t
 _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jp(i0);
-    return (_jit->pc.w);
+    return (jp(i0));
 }
 dbopi(unord)
 #  undef fopi
index eb668b3..5c4515a 100644 (file)
@@ -3,9 +3,10 @@
 #define JIT_INSTR_MAX 42
     0, /* data */
     0, /* live */
-    3, /* align */
+    11,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     3, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     3, /* va_start */
     5, /* va_arg */
     7, /* va_arg_d */
@@ -36,9 +52,9 @@
     5, /* addxi */
     4, /* subr */
     6, /* subi */
-    6, /* subcr */
+    12,        /* subcr */
     6, /* subci */
-    6, /* subxr */
+    12,        /* subxr */
     5, /* subxi */
     8, /* rsbi */
     5, /* mulr */
@@ -52,9 +68,9 @@
     22,        /* divr_u */
     25,        /* divi_u */
     23,        /* qdivr */
-    26,        /* qdivi */
+    28,        /* qdivi */
     24,        /* qdivr_u */
-    27,        /* qdivi_u */
+    29,        /* qdivi_u */
     21,        /* remr */
     24,        /* remi */
     22,        /* remr_u */
     5, /* movi */
     5, /* movnr */
     5, /* movzr */
+    9, /* casr */
+    13,        /* casi */
     11,        /* extr_c */
     11,        /* extr_uc */
     3, /* extr_s */
     3, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    7, /* bswapr_us */
+    4, /* bswapr_ui */
+    0, /* bswapr_ul */
     7, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     2, /* callr */
     5, /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     4, /* extr_d */
     4, /* extr_f_d */
     10,        /* movr_d */
-    24,        /* movi_d */
+    33,        /* movi_d */
     4, /* ldr_d */
     8, /* ldi_d */
     5, /* ldxr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    7, /* bswapr_us */
-    4, /* bswapr_ui */
-    0, /* bswapr_ul */
-    9, /* casr */
-    13,        /* casi */
-#endif
+    21,        /* clo */
+    17,        /* clz */
+    15,        /* cto */
+    11,        /* ctz */
+#endif /* __X32 */
 
 #if __X64
 #if __CYGWIN__ || _WIN32
 #define JIT_INSTR_MAX 130
     0, /* data */
     0, /* live */
-    6, /* align */
+    27,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     7, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     7, /* va_start */
     7, /* va_arg */
     9, /* va_arg_d */
     10,        /* movi */
     7, /* movnr */
     7, /* movzr */
+    11,        /* casr */
+    21,        /* casi */
     7, /* extr_c */
     7, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     3, /* extr_i */
     3, /* extr_ui */
+    9, /* bswapr_us */
+    6, /* bswapr_ui */
+    6, /* bswapr_ul */
     9, /* htonr_us */
     6, /* htonr_ui */
     6, /* htonr_ul */
     4, /* ldr_c */
-    15,        /* ldi_c */
+    14,        /* ldi_c */
     4, /* ldr_uc */
-    15,        /* ldi_uc */
+    14,        /* ldi_uc */
     4, /* ldr_s */
-    15,        /* ldi_s */
+    14,        /* ldi_s */
     4, /* ldr_us */
-    15,        /* ldi_us */
+    14,        /* ldi_us */
     3, /* ldr_i */
-    14,        /* ldi_i */
+    13,        /* ldi_i */
     3, /* ldr_ui */
-    14,        /* ldi_ui */
+    13,        /* ldi_ui */
     3, /* ldr_l */
-    14,        /* ldi_l */
+    13,        /* ldi_l */
     5, /* ldxr_c */
     8, /* ldxi_c */
     5, /* ldxr_uc */
     4, /* ldxr_l */
     7, /* ldxi_l */
     6, /* str_c */
-    17,        /* sti_c */
+    16,        /* sti_c */
     4, /* str_s */
-    15,        /* sti_s */
+    14,        /* sti_s */
     3, /* str_i */
-    14,        /* sti_i */
+    13,        /* sti_i */
     3, /* str_l */
-    14,        /* sti_l */
+    13,        /* sti_l */
     7, /* stxr_c */
     7, /* stxi_c */
     5, /* stxr_s */
     10,        /* bxsubi */
     9, /* bxsubr_u */
     10,        /* bxsubi_u */
-    3, /* jmpr */
+    2, /* jmpr */
     5, /* jmpi */
-    3, /* callr */
-    13,        /* calli */
+    2, /* callr */
+    20,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* putargr_f */
     0, /* putargi_f */
     10,        /* addr_f */
-    21,        /* addi_f */
+    19,        /* addi_f */
     15,        /* subr_f */
-    21,        /* subi_f */
-    27,        /* rsbi_f */
+    19,        /* subi_f */
+    26,        /* rsbi_f */
     10,        /* mulr_f */
-    21,        /* muli_f */
+    19,        /* muli_f */
     15,        /* divr_f */
-    21,        /* divi_f */
-    15,        /* negr_f */
+    19,        /* divi_f */
+    14,        /* negr_f */
     15,        /* absr_f */
     5, /* sqrtr_f */
     16,        /* ltr_f */
-    31,        /* lti_f */
+    30,        /* lti_f */
     16,        /* ler_f */
-    31,        /* lei_f */
+    30,        /* lei_f */
     18,        /* eqr_f */
-    33,        /* eqi_f */
+    32,        /* eqi_f */
     16,        /* ger_f */
-    31,        /* gei_f */
+    30,        /* gei_f */
     16,        /* gtr_f */
-    31,        /* gti_f */
+    30,        /* gti_f */
     20,        /* ner_f */
-    35,        /* nei_f */
+    34,        /* nei_f */
     16,        /* unltr_f */
-    31,        /* unlti_f */
+    30,        /* unlti_f */
     16,        /* unler_f */
-    31,        /* unlei_f */
+    30,        /* unlei_f */
     16,        /* uneqr_f */
-    31,        /* uneqi_f */
+    30,        /* uneqi_f */
     16,        /* unger_f */
-    31,        /* ungei_f */
+    30,        /* ungei_f */
     16,        /* ungtr_f */
-    31,        /* ungti_f */
+    30,        /* ungti_f */
     16,        /* ltgtr_f */
-    31,        /* ltgti_f */
+    30,        /* ltgti_f */
     16,        /* ordr_f */
-    31,        /* ordi_f */
+    30,        /* ordi_f */
     16,        /* unordr_f */
-    31,        /* unordi_f */
+    30,        /* unordi_f */
     5, /* truncr_f_i */
     5, /* truncr_f_l */
     5, /* extr_f */
     5, /* extr_d_f */
     5, /* movr_f */
-    15,        /* movi_f */
+    18,        /* movi_f */
     5, /* ldr_f */
-    16,        /* ldi_f */
+    15,        /* ldi_f */
     6, /* ldxr_f */
     8, /* ldxi_f */
     5, /* str_f */
-    16,        /* sti_f */
+    15,        /* sti_f */
     6, /* stxr_f */
     9, /* stxi_f */
     10,        /* bltr_f */
-    21,        /* blti_f */
+    19,        /* blti_f */
     10,        /* bler_f */
-    24,        /* blei_f */
+    23,        /* blei_f */
     12,        /* beqr_f */
     27,        /* beqi_f */
     10,        /* bger_f */
-    25,        /* bgei_f */
+    24,        /* bgei_f */
     10,        /* bgtr_f */
-    25,        /* bgti_f */
+    24,        /* bgti_f */
     13,        /* bner_f */
-    28,        /* bnei_f */
+    27,        /* bnei_f */
     10,        /* bunltr_f */
-    25,        /* bunlti_f */
+    24,        /* bunlti_f */
     10,        /* bunler_f */
-    25,        /* bunlei_f */
+    24,        /* bunlei_f */
     10,        /* buneqr_f */
-    25,        /* buneqi_f */
+    24,        /* buneqi_f */
     10,        /* bunger_f */
-    25,        /* bungei_f */
+    24,        /* bungei_f */
     10,        /* bungtr_f */
-    25,        /* bungti_f */
+    24,        /* bungti_f */
     10,        /* bltgtr_f */
-    25,        /* bltgti_f */
+    24,        /* bltgti_f */
     10,        /* bordr_f */
-    25,        /* bordi_f */
+    24,        /* bordi_f */
     10,        /* bunordr_f */
-    25,        /* bunordi_f */
+    24,        /* bunordi_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     25,        /* muli_d */
     15,        /* divr_d */
     25,        /* divi_d */
-    22,        /* negr_d */
+    21,        /* negr_d */
     16,        /* absr_d */
     5, /* sqrtr_d */
     17,        /* ltr_d */
     5, /* extr_d */
     5, /* extr_f_d */
     5, /* movr_d */
-    15,        /* movi_d */
+    29,        /* movi_d */
     5, /* ldr_d */
-    16,        /* ldi_d */
+    15,        /* ldi_d */
     6, /* ldxr_d */
     8, /* ldxi_d */
     5, /* str_d */
-    16,        /* sti_d */
+    15,        /* sti_d */
     6, /* stxr_d */
     9, /* stxi_d */
     11,        /* bltr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    9, /* bswapr_us */
-    6, /* bswapr_ui */
-    6, /* bswapr_ul */
-    0, /* casr */
-    0, /* casi */
+    27,        /* clo */
+    21,        /* clz */
+    20,        /* cto */
+    14,        /* ctz */
 #else
 
 #  if __X64_32
-#define JIT_INSTR_MAX 108
+#define JIT_INSTR_MAX 105
     0, /* data */
     0, /* live */
-    3, /* align */
+    7, /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     3, /* label */
-    108,       /* prolog */
+    105,       /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    41,        /* va_start */
-    45,        /* va_arg */
-    54,        /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    33,        /* va_start */
+    43,        /* va_arg */
+    45,        /* va_arg_d */
     0, /* va_end */
     5, /* addr */
     7, /* addi */
     6, /* movi */
     7, /* movnr */
     7, /* movzr */
+    11,        /* casr */
+    16,        /* casi */
     7, /* extr_c */
     7, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    9, /* bswapr_us */
+    6, /* bswapr_ui */
+    0, /* bswapr_ul */
     9, /* htonr_us */
     6, /* htonr_ui */
     0, /* htonr_ul */
     8, /* sti_i */
     0, /* str_l */
     0, /* sti_l */
-    12,        /* stxr_c */
+    11,        /* stxr_c */
     7, /* stxi_c */
-    10,        /* stxr_s */
+    9, /* stxr_s */
     7, /* stxi_s */
-    9, /* stxr_i */
+    8, /* stxr_i */
     6, /* stxi_i */
     0, /* stxr_l */
     0, /* stxi_l */
     10,        /* bxsubi_u */
     2, /* jmpr */
     5, /* jmpi */
-    3, /* callr */
+    2, /* callr */
     9, /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* putargr_f */
     0, /* putargi_f */
     10,        /* addr_f */
-    21,        /* addi_f */
+    20,        /* addi_f */
     15,        /* subr_f */
-    21,        /* subi_f */
-    26,        /* rsbi_f */
+    20,        /* subi_f */
+    25,        /* rsbi_f */
     10,        /* mulr_f */
-    21,        /* muli_f */
+    20,        /* muli_f */
     15,        /* divr_f */
-    21,        /* divi_f */
+    20,        /* divi_f */
     15,        /* negr_f */
     15,        /* absr_f */
     5, /* sqrtr_f */
     11,        /* movi_f */
     6, /* ldr_f */
     10,        /* ldi_f */
-    11,        /* ldxr_f */
+    10,        /* ldxr_f */
     9, /* ldxi_f */
     6, /* str_f */
     10,        /* sti_f */
-    11,        /* stxr_f */
+    10,        /* stxr_f */
     9, /* stxi_f */
     10,        /* bltr_f */
-    21,        /* blti_f */
+    20,        /* blti_f */
     10,        /* bler_f */
-    21,        /* blei_f */
+    20,        /* blei_f */
     12,        /* beqr_f */
     23,        /* beqi_f */
     10,        /* bger_f */
-    21,        /* bgei_f */
+    20,        /* bgei_f */
     10,        /* bgtr_f */
-    21,        /* bgti_f */
+    20,        /* bgti_f */
     13,        /* bner_f */
-    24,        /* bnei_f */
+    23,        /* bnei_f */
     10,        /* bunltr_f */
-    21,        /* bunlti_f */
+    20,        /* bunlti_f */
     10,        /* bunler_f */
-    21,        /* bunlei_f */
+    20,        /* bunlei_f */
     10,        /* buneqr_f */
-    21,        /* buneqi_f */
+    20,        /* buneqi_f */
     10,        /* bunger_f */
-    21,        /* bungei_f */
+    20,        /* bungei_f */
     10,        /* bungtr_f */
-    21,        /* bungti_f */
+    20,        /* bungti_f */
     10,        /* bltgtr_f */
-    21,        /* bltgti_f */
+    20,        /* bltgti_f */
     10,        /* bordr_f */
-    21,        /* bordi_f */
+    20,        /* bordi_f */
     10,        /* bunordr_f */
-    21,        /* bunordi_f */
+    20,        /* bunordi_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* putargr_d */
     0, /* putargi_d */
     10,        /* addr_d */
-    33,        /* addi_d */
+    29,        /* addi_d */
     15,        /* subr_d */
-    33,        /* subi_d */
-    38,        /* rsbi_d */
+    29,        /* subi_d */
+    34,        /* rsbi_d */
     10,        /* mulr_d */
-    33,        /* muli_d */
+    29,        /* muli_d */
     15,        /* divr_d */
-    33,        /* divi_d */
+    29,        /* divi_d */
     22,        /* negr_d */
     16,        /* absr_d */
     5, /* sqrtr_d */
     23,        /* movi_d */
     6, /* ldr_d */
     10,        /* ldi_d */
-    11,        /* ldxr_d */
+    10,        /* ldxr_d */
     9, /* ldxi_d */
     6, /* str_d */
     10,        /* sti_d */
-    11,        /* stxr_d */
+    10,        /* stxr_d */
     9, /* stxi_d */
     11,        /* bltr_d */
-    34,        /* blti_d */
+    30,        /* blti_d */
     11,        /* bler_d */
-    34,        /* blei_d */
+    30,        /* blei_d */
     13,        /* beqr_d */
     36,        /* beqi_d */
     11,        /* bger_d */
-    34,        /* bgei_d */
+    30,        /* bgei_d */
     11,        /* bgtr_d */
-    34,        /* bgti_d */
+    30,        /* bgti_d */
     14,        /* bner_d */
-    37,        /* bnei_d */
+    33,        /* bnei_d */
     11,        /* bunltr_d */
-    34,        /* bunlti_d */
+    30,        /* bunlti_d */
     11,        /* bunler_d */
-    34,        /* bunlei_d */
+    30,        /* bunlei_d */
     11,        /* buneqr_d */
-    34,        /* buneqi_d */
+    30,        /* buneqi_d */
     11,        /* bunger_d */
-    34,        /* bungei_d */
+    30,        /* bungei_d */
     11,        /* bungtr_d */
-    34,        /* bungti_d */
+    30,        /* bungti_d */
     11,        /* bltgtr_d */
-    34,        /* bltgti_d */
+    30,        /* bltgti_d */
     11,        /* bordr_d */
-    34,        /* bordi_d */
+    30,        /* bordi_d */
     11,        /* bunordr_d */
-    34,        /* bunordi_d */
+    30,        /* bunordi_d */
     0, /* pushargr_d */
     0, /* pushargi_d */
     0, /* retr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    9, /* bswapr_us */
-    6, /* bswapr_ui */
-    0, /* bswapr_ul */
-    0, /* casr */
-    0, /* casi */
+    11,        /* clo */
+    5, /* clz */
+    11,        /* cto */
+    5, /* ctz */
+#else
 
-#  else
-#define JIT_INSTR_MAX 115
+#define JIT_INSTR_MAX 112
     0, /* data */
     0, /* live */
-    6, /* align */
+    27,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     7, /* label */
-    115,       /* prolog */
+    112,       /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     38,        /* va_start */
     41,        /* va_arg */
     48,        /* va_arg_d */
     10,        /* movi */
     7, /* movnr */
     7, /* movzr */
+    11,        /* casr */
+    16,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     3, /* extr_i */
     3, /* extr_ui */
+    9, /* bswapr_us */
+    6, /* bswapr_ui */
+    6, /* bswapr_ul */
     9, /* htonr_us */
     6, /* htonr_ui */
     6, /* htonr_ul */
     9, /* bxsubr_u */
     10,        /* bxsubi_u */
     2, /* jmpr */
-    13,        /* jmpi */
-    3, /* callr */
-    12,        /* calli */
+    5, /* jmpi */
+    2, /* callr */
+    13,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     10,        /* bltr_f */
     20,        /* blti_f */
     10,        /* bler_f */
-    25,        /* blei_f */
+    22,        /* blei_f */
     12,        /* beqr_f */
-    27,        /* beqi_f */
+    22,        /* beqi_f */
     10,        /* bger_f */
-    25,        /* bgei_f */
+    22,        /* bgei_f */
     10,        /* bgtr_f */
-    25,        /* bgti_f */
+    22,        /* bgti_f */
     13,        /* bner_f */
-    28,        /* bnei_f */
+    25,        /* bnei_f */
     10,        /* bunltr_f */
-    25,        /* bunlti_f */
+    23,        /* bunlti_f */
     10,        /* bunler_f */
-    25,        /* bunlei_f */
+    23,        /* bunlei_f */
     10,        /* buneqr_f */
-    25,        /* buneqi_f */
+    23,        /* buneqi_f */
     10,        /* bunger_f */
-    25,        /* bungei_f */
+    23,        /* bungei_f */
     10,        /* bungtr_f */
-    25,        /* bungti_f */
+    22,        /* bungti_f */
     10,        /* bltgtr_f */
-    25,        /* bltgti_f */
+    22,        /* bltgti_f */
     10,        /* bordr_f */
-    25,        /* bordi_f */
+    22,        /* bordi_f */
     10,        /* bunordr_f */
-    25,        /* bunordi_f */
+    22,        /* bunordi_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    9, /* bswapr_us */
-    6, /* bswapr_ui */
-    6, /* bswapr_ul */
-    11,        /* casr */
-    16,        /* casi */
+    11,        /* clo */
+    5, /* clz */
+    11,        /* cto */
+    5, /* ctz */
 #endif /* __CYGWIN__ || _WIN32 */
 #  endif /* __X64_32 */
 #endif /* __X64 */
index 227b1a2..3de0214 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -408,14 +408,14 @@ _x87_b##name##i_##type(jit_state_t *_jit,                         \
                       jit_word_t i0, jit_int32_t r0,                   \
                       jit_float##size##_t *i1)                         \
 {                                                                      \
-    jit_word_t         word;                                           \
+    jit_word_t         w;                                              \
     jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
                                          jit_class_nospill);           \
     assert(jit_x87_reg_p(reg));                                                \
     x87_movi_##type(rn(reg), i1);                                      \
-    word = x87_b##name##r_##type(i0, r0, rn(reg));                     \
+    w = x87_b##name##r_##type(i0, r0, rn(reg));                                \
     jit_unget_reg(reg);                                                        \
-    return (word);                                                     \
+    return (w);                                                                \
 }
 #  define fopi(name)                   fpr_opi(name, f, 32)
 #  define fbopi(name)                  fpr_bopi(name, f, 32)
@@ -662,6 +662,7 @@ _x87_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
 #if defined(sun)
     /* for the sake of passing test cases in x87 mode, otherwise only sse
      * is supported */
@@ -692,6 +693,7 @@ _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     fldr(r1);
     fisttpqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
     ldxi(r0, _RBP_REGNO, CVT_OFFSET);
@@ -701,6 +703,7 @@ _x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     stxi(CVT_OFFSET, _RBP_REGNO, r1);
 #  if __X32
     fildlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
@@ -771,8 +774,7 @@ _x87jcc(jit_state_t *_jit, jit_int32_t code,
        fldr(r0);
        fucomipr(r1 + 1);
     }
-    jcc(code, i0);
-    return (_jit->pc.w);
+    return (jcc(code, i0));
 }
 
 static jit_word_t
@@ -788,8 +790,7 @@ _x87jcc2(jit_state_t *_jit, jit_int32_t code,
        fldr(f0);
        fucomipr(f1 + 1);
     }
-    jcc(code, i0);
-    return (_jit->pc.w);
+    return (jcc(code, i0));
 }
 
 fopi(lt)
@@ -847,6 +848,7 @@ _x87_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
        fldln2();
     else {
        if (_jitc->no_data) {
+           CHECK_CVT_OFFSET();
            reg = jit_get_reg(jit_class_gpr);
            movi(rn(reg), data.i);
            stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
@@ -1038,6 +1040,7 @@ _x87_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
        fldln2();
     else {
        if (_jitc->no_data) {
+           CHECK_CVT_OFFSET();
            reg = jit_get_reg(jit_class_gpr);
 #if __X32 || __X64_32
            movi(rn(reg), data.ii[0]);
@@ -1082,10 +1085,9 @@ _x87_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        fldr(f1);
        fucomipr(f2 + 1);
     }
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_E, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
@@ -1115,10 +1117,9 @@ _x87_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        fldr(f1);
        fucomipr(f2 + 1);
     }
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_NE, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
@@ -1283,6 +1284,7 @@ dbopi(le)
 static jit_word_t
 _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t                 w;
     jit_int32_t                        f0, f1;
     jit_word_t                 jp_code;
     if (r1 == _ST0_REGNO)      f0 = r1, f1 = r0;
@@ -1293,11 +1295,10 @@ _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
        fldr(f0);
        fucomipr(f1 + 1);
     }
-    jpes(0);
-    jp_code = _jit->pc.w;
-    jcc(X86_CC_E, i0);
-    patch_rel_char(jp_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jpes(0);
+    w = jcc(X86_CC_E, i0);
+    patch_at(jp_code, _jit->pc.w);
+    return (w);
 }
 dbopi(eq)
 dbopi(ge)
@@ -1306,6 +1307,7 @@ dbopi(gt)
 static jit_word_t
 _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t                 w;
     jit_int32_t                        f0, f1;
     jit_word_t                 jp_code;
     jit_word_t                 jz_code;
@@ -1317,14 +1319,12 @@ _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
        fldr(f0);
        fucomipr(f1 + 1);
     }
-    jpes(0);
-    jp_code = _jit->pc.w;
-    jzs(0);
-    jz_code = _jit->pc.w;
-    patch_rel_char(jp_code, _jit->pc.w);
-    jmpi(i0);
-    patch_rel_char(jz_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jpes(0);
+    jz_code = jzs(0);
+    patch_at(jp_code, _jit->pc.w);
+    w = jmpi(i0);
+    patch_at(jz_code, _jit->pc.w);
+    return (w);
 }
 dbopi(ne)
 dbopi(unlt)
index 6472e56..b409457 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
 #include <lightning/jit_private.h>
 
 #if __X32
+#  define CAN_RIP_ADDRESS              0
+#  define address_p(i0)                        1
 #  define jit_arg_reg_p(i)             0
 #  define jit_arg_f_reg_p(i)           0
-#  define stack_framesize              20
-#  define stack_adjust                 12
-#  define CVT_OFFSET                   -12
+/* callee save                        + 16 byte align
+ * align16(%ebp + %rbx + %rsi + %rdi) + (16 - 4)  */
+#  define stack_framesize              28
 #  define REAL_WORDSIZE                        4
 #  define va_gp_increment              4
 #  define va_fp_increment              8
 #else
+#  if _WIN32 || __X64_32
+#    define CAN_RIP_ADDRESS            0
+#  else
+#    define CAN_RIP_ADDRESS            1
+#  endif
+#  if __X64_32
+#    define address_p(i0)              ((jit_word_t)(i0) >= 0)
+#  else
+#    define address_p(i0)              can_sign_extend_int_p(i0)
+#  endif
 #  if __CYGWIN__ || _WIN32
 #    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 4)
 #    define jit_arg_f_reg_p(i)         jit_arg_reg_p(i)
+/* callee save                                                + 16 byte align
+ * align16(%rbp+%rbx+%rdi+%rsi+%r1[2-5]+%xmm[6-9]+%xmm1[0-5]) + (16 - 8) */
 #    define stack_framesize            152
 #    define va_fp_increment            8
 #  else
 #    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 6)
 #    define jit_arg_f_reg_p(i)         ((i) >= 0 && (i) < 8)
+/* callee save                                      + 16 byte align
+ * align16(%rbp + %r15 + %r14 + %r13 + %r12 + %rbx) + (16 - 8) */
 #    define stack_framesize            56
 #    define first_gp_argument          rdi
 #    define first_gp_offset            offsetof(jit_va_list_t, rdi)
 #    define first_fp_from_offset(fp)   (((fp) - va_gp_max_offset) / 16)
 #  endif
 #  define va_gp_increment              8
-#  define stack_adjust                 8
-#  define CVT_OFFSET                   -8
 #  define REAL_WORDSIZE                        8
 #endif
+#define CVT_OFFSET                     _jitc->function->cvt_offset
+
+#define CHECK_CVT_OFFSET()                                             \
+    do {                                                               \
+       if (!_jitc->function->cvt_offset) {                             \
+           _jitc->again = 1;                                           \
+           _jitc->function->cvt_offset =                               \
+                jit_allocai(sizeof(jit_float64_t));                    \
+       }                                                               \
+    } while (0)
 
 /*
  * Types
@@ -99,6 +123,8 @@ typedef struct jit_va_list {
 /*
  * Prototypes
  */
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define sse_from_x87_f(r0, r1)         _sse_from_x87_f(_jit, r0, r1)
@@ -227,6 +253,22 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+#if __X32
+    _RBX, _RSI, _RDI,
+#elif (__CYGWIN__ || _WIN32)
+    _RBX, _RDI, _RSI, _R12, _R13, _R14, _R15,
+#else
+    _R15, _R14, _R13, _R12, _RBX,
+#endif
+};
+
+#if __X64 && (__CYGWIN__ || _WIN32)
+static jit_int32_t fregs[] = {
+    _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
+};
+#endif
+
 /*
  * Implementation
  */
@@ -234,6 +276,45 @@ void
 jit_get_cpu(void)
 {
     union {
+       /* eax=7 and ecx=0 */
+       struct {
+           jit_uword_t fsgsbase        : 1;
+           jit_uword_t IA32_TSC_ADJUST : 1;
+           jit_uword_t sgx             : 1;
+           jit_uword_t bmi1            : 1;
+           jit_uword_t hle             : 1;
+           jit_uword_t avx2            : 1;
+           jit_uword_t FDP_EXCPTN_ONLY : 1;
+           jit_uword_t smep            : 1;
+           jit_uword_t bmi2            : 1;
+           jit_uword_t erms            : 1;
+           jit_uword_t invpcid         : 1;
+           jit_uword_t rtm             : 1;
+           jit_uword_t rdt_m_pqm       : 1;
+           jit_uword_t dep_FPU_CS_DS   : 1;
+           jit_uword_t mpx             : 1;
+           jit_uword_t rdt_a_pqe       : 1;
+           jit_uword_t avx512_f        : 1;
+           jit_uword_t avx512_dq       : 1;
+           jit_uword_t rdseed          : 1;
+           jit_uword_t adx             : 1;
+           jit_uword_t smap            : 1;
+           jit_uword_t avx512_ifma     : 1;
+           jit_uword_t __reserved0     : 1;
+           jit_uword_t clflushopt      : 1;
+           jit_uword_t clwb            : 1;
+           jit_uword_t pt              : 1;
+           jit_uword_t avx512_pf       : 1;
+           jit_uword_t avx512_er       : 1;
+           jit_uword_t avx512_cd       : 1;
+           jit_uword_t sha             : 1;
+           jit_uword_t avx512_bw       : 1;
+           jit_uword_t avx512_vl       : 1;
+       } bits;
+       jit_uword_t     cpuid;
+    } ebx;
+    union {
+       /* eax=0 */
        struct {
            jit_uint32_t sse3           : 1;
            jit_uint32_t pclmulqdq      : 1;
@@ -271,6 +352,7 @@ jit_get_cpu(void)
        jit_uword_t     cpuid;
     } ecx;
     union {
+       /* eax=0 */
        struct {
            jit_uint32_t fpu            : 1;
            jit_uint32_t vme            : 1;
@@ -310,7 +392,7 @@ jit_get_cpu(void)
 #if __X32
     int                        ac, flags;
 #endif
-    jit_uword_t                eax, ebx;
+    jit_uword_t                eax;
 
 #if __X32
     /* adapted from glibc __sysconf */
@@ -339,7 +421,7 @@ jit_get_cpu(void)
 #else
     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
 #endif
-                     : "=a" (eax), "=r" (ebx),
+                     : "=a" (eax), "=r" (ebx.cpuid),
                      "=c" (ecx.cpuid), "=d" (edx.cpuid)
                      : "0" (1));
 
@@ -361,6 +443,15 @@ jit_get_cpu(void)
     jit_cpu.aes                = ecx.bits.aes;
     jit_cpu.avx                = ecx.bits.avx;
 
+    /* query %eax = 7 and ecx = 0 function */
+#if __X64
+    __asm__ volatile ("cpuid"
+                     : "=a" (eax), "=b" (ebx.cpuid), "=c" (ecx), "=d" (edx)
+                     : "a" (7), "c" (0));
+#endif
+    jit_cpu.adx = ebx.bits.adx;
+
+
     /* query %eax = 0x80000001 function */
 #if __X64
 #  if __X64_32
@@ -368,10 +459,11 @@ jit_get_cpu(void)
 #  else
     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
 #  endif
-                     : "=a" (eax), "=r" (ebx),
+                     : "=a" (eax), "=r" (ebx.cpuid),
                      "=c" (ecx.cpuid), "=d" (edx.cpuid)
                      : "0" (0x80000001));
-    jit_cpu.lahf       = ecx.cpuid & 1;
+    jit_cpu.lahf       = !!(ecx.cpuid & 1);
+    jit_cpu.abm                = !!(ecx.cpuid & 32);
 #endif
 }
 
@@ -414,11 +506,15 @@ _jit_prolog(jit_state_t *_jit)
        _jitc->functions.length += 16;
     }
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
-    _jitc->function->self.size = stack_framesize;
+    /* One extra stack slot for implicit saved returned address */
+    _jitc->function->self.size = stack_framesize + REAL_WORDSIZE;
     _jitc->function->self.argi = _jitc->function->self.argf =
        _jitc->function->self.aoff = _jitc->function->self.alen = 0;
-    /* sse/x87 conversion */
-    _jitc->function->self.aoff = CVT_OFFSET;
+    _jitc->function->cvt_offset = 0;
+#if __X64 && (__CYGWIN__ || _WIN32)
+    /* force framepointer */
+    jit_check_frame();
+#endif
     _jitc->function->self.call = jit_call_default;
     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
              _jitc->reglen * sizeof(jit_int32_t));
@@ -444,6 +540,13 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
+#if __X32
+    /* Stack is 4 bytes aligned but jit functions keep it 8 bytes aligned.
+     * Called functions have 16 byte aligned stack. */
+    if (!_jitc->function->self.aoff)
+       _jitc->function->self.aoff = -4;
+#endif
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -500,22 +603,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
-{
-    jit_inc_synth_w(retr, u);
-    /* movr(%ret, %ret) would be optimized out */
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    /* explicitly tell it is live */
-    jit_live(JIT_RET);
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
+{
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -575,7 +674,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -585,6 +684,7 @@ void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare) {
        jit_link_prepare();
        /* Remember that a varargs function call is being constructed. */
@@ -629,12 +729,15 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
     assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
 #if __X64
     if (jit_arg_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi++;
@@ -647,8 +750,9 @@ _jit_arg(jit_state_t *_jit)
     {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += REAL_WORDSIZE;
+       jit_check_frame();
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -676,6 +780,7 @@ _jit_arg_f(jit_state_t *_jit)
     {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += REAL_WORDSIZE;
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
@@ -705,6 +810,7 @@ _jit_arg_d(jit_state_t *_jit)
     {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
@@ -715,63 +821,75 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
     else
 #endif
-       jit_ldxi_c(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_c(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
     else
 #endif
-       jit_ldxi_uc(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_uc(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
     else
 #endif
-       jit_ldxi_s(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_s(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
     else
 #endif
-       jit_ldxi_us(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_us(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w)) {
@@ -783,7 +901,10 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
      }
     else
 #endif
-       jit_ldxi_i(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_i(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -791,57 +912,66 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_ui(u, _RBP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_l(u, _RBP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 #endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     else
 #endif
-       jit_stxi(v->u.w, _RBP, u);
+    {
+       jit_node_t      *node = jit_stxi(v->u.w, _RBP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else
 #endif
     {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, _RBP, regno);
+       node = jit_stxi(v->u.w, _RBP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -857,7 +987,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(u, _XMM0 - v->u.w);
     else
 #endif
-       jit_ldxi_f(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_f(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -867,11 +1000,14 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(putargr_f, u, v);
 #if __X64
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_f_reg_p(v->u.w))
        jit_movr_f(_XMM0 - v->u.w, u);
     else
 #endif
-       jit_stxi_f(v->u.w, _RBP, u);
+    {
+       jit_node_t      *node = jit_stxi_f(v->u.w, _RBP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -882,14 +1018,16 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_fp(putargi_f, u, v);
 #if __X64
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(_XMM0 - v->u.w, u);
     else
 #endif
     {
-       regno = jit_get_reg(jit_class_gpr);
+       jit_node_t      *node;
+       regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, _RBP, regno);
+       node = jit_stxi_f(v->u.w, _RBP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -905,7 +1043,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(u, _XMM0 - v->u.w);
     else
 #endif
-       jit_ldxi_d(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_d(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -915,11 +1056,14 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(putargr_d, u, v);
 #if __X64
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_f_reg_p(v->u.w))
        jit_movr_d(_XMM0 - v->u.w, u);
     else
 #endif
-       jit_stxi_d(v->u.w, _RBP, u);
+    {
+       jit_node_t      *node = jit_stxi_d(v->u.w, _RBP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
@@ -930,24 +1074,26 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_dp(putargi_d, u, v);
 #if __X64
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_f_reg_p(v->u.w))
        jit_movi_d(_XMM0 - v->u.w, u);
     else
 #endif
     {
-       regno = jit_get_reg(jit_class_gpr);
+       jit_node_t      *node;
+       regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, _RBP, regno);
+       node = jit_stxi_d(v->u.w, _RBP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
 #if __X64
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -964,16 +1110,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
     {
        jit_stxi(_jitc->function->call.size, _RSP, u);
        _jitc->function->call.size += REAL_WORDSIZE;
+       jit_check_frame();
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
 #if __X64
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -994,6 +1141,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        jit_stxi(_jitc->function->call.size, _RSP, regno);
        _jitc->function->call.size += REAL_WORDSIZE;
        jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -1028,6 +1176,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
     {
        jit_stxi_f(_jitc->function->call.size, _RSP, u);
        _jitc->function->call.size += REAL_WORDSIZE;
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -1066,6 +1215,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        jit_stxi_f(_jitc->function->call.size, _RSP, regno);
        _jitc->function->call.size += REAL_WORDSIZE;
        jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -1100,6 +1250,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
     {
        jit_stxi_d(_jitc->function->call.size, _RSP, u);
        _jitc->function->call.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -1138,6 +1289,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        jit_stxi_d(_jitc->function->call.size, _RSP, regno);
        _jitc->function->call.size += sizeof(jit_float64_t);
        jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
@@ -1171,6 +1323,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
     jit_int32_t                 reg;
     jit_node_t         *call;
     assert(_jitc->function);
+    jit_check_frame();
     reg = r0;
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
@@ -1203,32 +1356,26 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 jit_node_t *
 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
-#if __X64
-    jit_int32_t                reg;
-#endif
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
 #if __X64
-    /* FIXME preventing %rax allocation is good enough, but for consistency
-     * it should automatically detect %rax is dead, in case it has run out
-     * registers, and not save/restore it, what would be wrong if using the
-     * the return value, otherwise, just a needless noop */
-    /* >> prevent %rax from being allocated as the function pointer */
-    jit_regset_setbit(&_jitc->regarg, _RAX);
-    reg = jit_get_reg(jit_class_gpr);
-    node = jit_movi(reg, (jit_word_t)i0);
-    jit_finishr(reg);
-    jit_unget_reg(reg);
-    /* << prevent %rax from being allocated as the function pointer */
-    jit_regset_clrbit(&_jitc->regarg, _RAX);
-#else
+#  if !(__CYGWIN__ || _WIN32)
+    if (_jitc->function->call.call & jit_call_varargs) {
+       if (_jitc->function->call.argf)
+           jit_movi(_RAX, _jitc->function->call.argf);
+       else
+           jit_movi(_RAX, 0);
+       jit_live(_RAX);
+    }
+#  endif
+#endif
     node = jit_calli(i0);
     node->v.w = _jitc->function->call.argi;
     node->w.w = _jitc->function->call.argf;
-#endif
     _jitc->function->call.argi = _jitc->function->call.argf =
        _jitc->function->call.size = 0;
     _jitc->prepare = 0;
@@ -1333,6 +1480,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1598,7 +1746,10 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
-           case jit_code_note:         case jit_code_name:
+            case jit_code_skip:
+                nop(node->u.w);
+                break;
+            case jit_code_note:                case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_label:
@@ -1654,6 +1805,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
@@ -1695,7 +1850,14 @@ _emit_code(jit_state_t *_jit)
                    else {
                        assert(temp->code == jit_code_label ||
                               temp->code == jit_code_epilog);
-                       word = movi_p(rn(node->u.w), node->v.w);
+#if CAN_RIP_ADDRESS
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if ((jit_int32_t)word == word)
+                           word = movi(rn(node->u.w), _jit->pc.w);
+                       else
+#endif
+                           word = movi_p(rn(node->u.w), node->v.w);
                        patch(word, node);
                    }
                }
@@ -2017,6 +2179,7 @@ _emit_code(jit_state_t *_jit)
                case_bff(unord, _d);
                case_bfw(unord, _d, 64);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -2027,14 +2190,24 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+#if __X64
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if ((jit_int32_t)word == word)
+                           word = jmpi(_jit->pc.w);
+                       else
+#endif
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
+               }
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
@@ -2045,22 +2218,34 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
-                       word = calli_p(_jit->pc.w);
+#if __X64
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if ((jit_int32_t)word == word)
+                           word = calli(_jit->pc.w);
+                       else
+#endif
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    calli(node->u.w);
+               }
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
+               compute_framesize();
+               patch_alist(0);
                _jitc->again = 0;
                prolog(node);
                break;
@@ -2076,10 +2261,29 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* real stack framesize is not in the jit_function_t,
+                    * if it were, would need to not be undone  */
+                   /* cvt_offset must also not be undone */
+                   undo.func.cvt_offset = _jitc->function->cvt_offset;
+                   /* this will be recomputed but undo anyway to have it
+                    * better self documented.*/
+                   undo.func.need_stack = _jitc->function->need_stack;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                if (node->link &&
@@ -2103,11 +2307,23 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -2117,10 +2333,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -2182,7 +2414,7 @@ _emit_code(jit_state_t *_jit)
     for (offset = 0; offset < _jitc->patches.offset; offset++) {
        node = _jitc->patches.ptr[offset].node;
        word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
-       patch_at(node, _jitc->patches.ptr[offset].inst, word);
+       patch_at(_jitc->patches.ptr[offset].inst, word);
     }
 
     jit_flush(_jit->code.ptr, _jit->pc.uc);
@@ -2231,6 +2463,26 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
        sse_stxi_d(i0, rn(r0), rn(r1));
 }
 
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    /* Save stack pointer in first slot */
+    _jitc->framesize = REAL_WORDSIZE;
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += REAL_WORDSIZE;
+
+#if __X64 && (__CYGWIN__ || _WIN32)
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+#endif
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+    _jitc->framesize += 16 - REAL_WORDSIZE;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
@@ -2256,6 +2508,7 @@ _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 static void
 _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
     sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
 }
@@ -2263,6 +2516,7 @@ _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
     sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
 }
@@ -2270,6 +2524,7 @@ _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
     x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
 }
@@ -2277,6 +2532,7 @@ _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
     x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
 }
index 49244b5..b0b0ef7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -227,8 +227,25 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec)
        for (regno = 0; regno < _jitc->reglen; regno++) {
            if ((jit_class(_rvs[regno].spec) & spec) == spec &&
                !jit_regset_tstbit(&_jitc->regarg, regno) &&
-               !jit_regset_tstbit(&_jitc->reglive, regno))
+               !jit_regset_tstbit(&_jitc->reglive, regno)) {
+               if (jit_regset_tstbit(&_jitc->regmask, regno)) {
+                   /* search further, attempting to find a truly known
+                   * free register, not just one in unknown state. */
+                   jit_int32_t regfree;
+
+                   for (regfree = regno + 1;
+                        regfree < _jitc->reglen; regfree++) {
+                       if ((jit_class(_rvs[regfree].spec) & spec) == spec &&
+                           !jit_regset_tstbit(&_jitc->regarg, regfree) &&
+                           !jit_regset_tstbit(&_jitc->reglive, regfree) &&
+                           !jit_regset_tstbit(&_jitc->regmask, regfree)) {
+                           regno = regfree;
+                           break;
+                       }
+                   }
+               }
                goto regarg;
+           }
        }
 
        /* search for a register matching spec that is not an argument
@@ -874,6 +891,7 @@ jit_new_state(void)
     jit_regset_new(&_jitc->regsav);
     jit_regset_new(&_jitc->reglive);
     jit_regset_new(&_jitc->regmask);
+    jit_regset_new(&_jitc->explive);
 
     jit_init();
 
@@ -1335,14 +1353,36 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
            mask = 0;
            break;
        case jit_code_live:     case jit_code_va_end:
-       case jit_code_retr:     case jit_code_retr_f:   case jit_code_retr_d:
-       case jit_code_pushargr: case jit_code_pushargr_f:
+       case jit_code_retr_c:   case jit_code_retr_uc:
+       case jit_code_retr_s:   case jit_code_retr_us:
+       case jit_code_retr_i:   case jit_code_retr_ui:
+       case jit_code_retr_l:
+       case jit_code_retr_f:   case jit_code_retr_d:
+       case jit_code_pushargr_c:
+       case jit_code_pushargr_uc:
+       case jit_code_pushargr_s:
+       case jit_code_pushargr_us:
+       case jit_code_pushargr_i:
+       case jit_code_pushargr_ui:
+       case jit_code_pushargr_l:
+       case jit_code_pushargr_f:
        case jit_code_pushargr_d:
        case jit_code_finishr:  /* synthesized will set jit_cc_a0_jmp */
            mask = jit_cc_a0_reg;
            break;
-       case jit_code_align:    case jit_code_reti:     case jit_code_pushargi:
-       case jit_code_finishi:  /* synthesized will set jit_cc_a0_jmp */
+       case jit_code_align:    case jit_code_skip:
+       case jit_code_reti_c:   case jit_code_reti_uc:
+       case jit_code_reti_s:   case jit_code_reti_us:
+       case jit_code_reti_i:   case jit_code_reti_ui:
+       case jit_code_reti_l:
+       case jit_code_pushargi_c:
+       case jit_code_pushargi_uc:
+       case jit_code_pushargi_s:
+       case jit_code_pushargi_us:
+       case jit_code_pushargi_i:
+       case jit_code_pushargi_ui:
+       case jit_code_pushargi_l:
+        case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */
            mask = jit_cc_a0_int;
            break;
        case jit_code_reti_f:   case jit_code_pushargi_f:
@@ -1354,7 +1394,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_allocai:
            mask = jit_cc_a0_int|jit_cc_a1_int;
            break;
-       case jit_code_arg:      case jit_code_arg_f:    case jit_code_arg_d:
+       case jit_code_arg_c:    case jit_code_arg_s:
+       case jit_code_arg_i:    case jit_code_arg_l:
+       case jit_code_arg_f:    case jit_code_arg_d:
            mask = jit_cc_a0_int|jit_cc_a0_arg;
            break;
        case jit_code_calli:    case jit_code_jmpi:
@@ -1378,11 +1420,17 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_getarg_f: case jit_code_getarg_d:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_arg;
            break;
-       case jit_code_putargr:  case jit_code_putargr_f:
-       case jit_code_putargr_d:
+       case jit_code_putargr_c:case jit_code_putargr_uc:
+       case jit_code_putargr_s:case jit_code_putargr_us:
+       case jit_code_putargr_i:case jit_code_putargr_ui:
+       case jit_code_putargr_l:
+       case jit_code_putargr_f:case jit_code_putargr_d:
            mask = jit_cc_a0_reg|jit_cc_a1_arg;
            break;
-       case jit_code_putargi:
+       case jit_code_putargi_c:case jit_code_putargi_uc:
+       case jit_code_putargi_s:case jit_code_putargi_us:
+       case jit_code_putargi_i:case jit_code_putargi_ui:
+       case jit_code_putargi_l:
            mask = jit_cc_a0_int|jit_cc_a1_arg;
            break;
        case jit_code_putargi_f:
@@ -1422,6 +1470,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_negr_d:   case jit_code_absr_d:   case jit_code_sqrtr_d:
        case jit_code_movr_d:   case jit_code_extr_d:   case jit_code_extr_f_d:
        case jit_code_ldr_d:
+       case jit_code_clor:     case jit_code_clzr:
+       case jit_code_ctor:     case jit_code_ctzr:
        case jit_code_movr_w_f: case jit_code_movr_f_w:
        case jit_code_movr_w_d: case jit_code_movr_d_w:
        case jit_code_va_arg:   case jit_code_va_arg_d:
@@ -1648,8 +1698,14 @@ _do_setup(jit_state_t *_jit)
      * at the start of a basic block */
     for (offset = 0; offset < _jitc->blocks.offset; offset++) {
        block = _jitc->blocks.ptr + offset;
-       if (!block->label || block->label->code == jit_code_epilog)
+       if (!block->label)
            continue;
+       if (block->label->code == jit_code_epilog) {
+           jit_regset_setbit(&block->reglive, JIT_RET);
+           jit_regset_setbit(&block->reglive, JIT_FRET);
+           jit_regset_com(&block->regmask, &block->reglive);
+           continue;
+       }
        jit_setup(block);
     }
 }
@@ -1750,7 +1806,7 @@ _check_block_again(jit_state_t *_jit)
     }
     while (todo);
 
-    return (1);
+    return (todo);
 }
 
 static void
@@ -1781,6 +1837,7 @@ _jit_optimize(jit_state_t *_jit)
     jit_node_t         *node;
     jit_block_t                *block;
     jit_word_t          offset;
+    jit_regset_t        regmask;
 
     todo = 0;
     _jitc->function = NULL;
@@ -1795,15 +1852,31 @@ _jit_optimize(jit_state_t *_jit)
     if (simplify())
        todo = 1;
 
-    /* Figure out labels that are only reached with a jump
-     * and is required to do a simple redundant_store removal
-     * on jit_beqi below */
+    jit_regset_set_ui(&regmask, 0);
+    for (offset = 0; offset < _jitc->reglen; offset++) {
+       if ((jit_class(_rvs[offset].spec) & (jit_class_gpr|jit_class_fpr)) &&
+           (jit_class(_rvs[offset].spec) & jit_class_sav) == jit_class_sav)
+           jit_regset_setbit(&regmask, offset);
+    }
+
+    /* Figure out labels that are only reached with a jump */
     jump = 1;
     for (node = _jitc->head; node; node = node->next) {
        switch (node->code) {
            case jit_code_label:
-               if (!jump)
+               if (!jump) {
                    node->flag |= jit_flag_head;
+                   if (!node->link) {
+                       /* Block is dead code or only reachable with an
+                        * indirect jumps. In such condition, must assume
+                        * all callee save registers are live. */
+                       block = _jitc->blocks.ptr + node->v.w;
+                       jit_regset_ior(&block->reglive,
+                                      &block->reglive, &regmask);
+                       /* Cleanup regmask */
+                       block_update_set(block, block);
+                   }
+               }
                break;
            case jit_code_jmpi:         case jit_code_jmpr:
            case jit_code_epilog:
@@ -1932,6 +2005,10 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node)
        case jit_code_label:    case jit_code_prolog:   case jit_code_epilog:
            block = _jitc->blocks.ptr + node->v.w;
            jit_regset_set(&_jitc->reglive, &block->reglive);
+           jit_regset_set_ui(&_jitc->explive, 0);
+           break;
+       case jit_code_live:
+           jit_regset_setbit(&_jitc->explive, node->u.w);
            break;
        case jit_code_callr:
            value = jit_regno(node->u.w);
@@ -2043,6 +2120,19 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
        else
            jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
     }
+    /* Prevent incorrect detection of running out of registers
+     * if will need to patch jump, and all registers have been
+     * used in the current block. */
+    if (node->code == jit_code_jmpi && (node->flag & jit_flag_node)) {
+       jit_node_t      *label = node->u.n;
+       jit_block_t     *block = _jitc->blocks.ptr + label->v.w;
+       jit_regset_set(&_jitc->reglive, &block->reglive);
+       jit_regset_set(&_jitc->regmask, &block->regmask);
+       if (jit_regset_set_p(&_jitc->explive)) {
+           jit_regset_ior(&_jitc->reglive, &block->reglive, &_jitc->explive);
+           jit_regset_xor(&_jitc->regmask, &_jitc->regmask, &_jitc->explive);
+       }
+    }
 }
 
 void
@@ -2244,7 +2334,7 @@ _jit_emit(jit_state_t *_jit)
 #else
     if (!_jit->user_code) {
        mmap_prot = PROT_READ | PROT_WRITE;
-#if !__OpenBSD__
+#if !(__OpenBSD__ || __APPLE__)
        mmap_prot |= PROT_EXEC;
 #endif
 #if __NetBSD__
@@ -2307,8 +2397,7 @@ _jit_emit(jit_state_t *_jit)
 #  endif
 #else
            _jit->code.ptr = mmap(NULL, length,
-                                 PROT_EXEC | PROT_READ | PROT_WRITE,
-                                 MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+                                 mmap_prot, mmap_flags, mmap_fd, 0);
 #endif
 
            assert(_jit->code.ptr != MAP_FAILED);
@@ -2340,12 +2429,12 @@ _jit_emit(jit_state_t *_jit)
        assert(result == 0);
     }
     if (!_jit->user_code) {
-       length = _jit->pc.uc - _jit->code.ptr;
+       _jit->code.protected = _jit->pc.uc - _jit->code.ptr;
 #  if __riscv && __WORDSIZE == 64
        /* FIXME should start adding consts at a page boundary */
-       length -= _jitc->consts.hash.count * sizeof(jit_word_t);
+       _jit->code.protected -= _jitc->consts.hash.count * sizeof(jit_word_t);
 #  endif
-       result = mprotect(_jit->code.ptr, length, PROT_READ | PROT_EXEC);
+       result = mprotect(_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC);
        assert(result == 0);
     }
 #endif /* HAVE_MMAP */
@@ -2355,6 +2444,32 @@ fail:
     return (NULL);
 }
 
+void
+_jit_protect(jit_state_t *_jit)
+{
+#if !HAVE_MMAP
+  assert (_jit->user_code);
+#else
+  int result;
+  if (_jit->user_code) return;
+  result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC);
+  assert (result == 0);
+#endif
+}
+
+void
+_jit_unprotect(jit_state_t *_jit)
+{
+#if !HAVE_MMAP
+  assert (_jit->user_code);
+#else
+  int result;
+  if (_jit->user_code) return;
+  result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_WRITE);
+  assert (result == 0);
+#endif
+}
+
 void
 _jit_frame(jit_state_t *_jit, jit_int32_t frame)
 {
@@ -2786,6 +2901,9 @@ _jit_update(jit_state_t *_jit, jit_node_t *node,
                         * to jump to unknown location. */
                        /* Treat all callee save as live. */
                        jit_regset_ior(live, live, mask);
+                       /*   Prevent explicitly set as live registers to
+                        * be used as a temporary for the jmpi. */
+                       jit_regset_ior(live, live, &_jitc->explive);
                        /* Treat anything else as dead. */
                        return;
                    }
@@ -2853,7 +2971,10 @@ _sequential_labels(jit_state_t *_jit)
                    if ((jump = node->link)) {
                        for (; jump; jump = link) {
                            link = jump->link;
-                           jump->u.n = prev;
+                           if (jump->code == jit_code_movi)
+                               jump->v.n = prev;
+                           else
+                               jump->u.n = prev;
                            jump->link = prev->link;
                            prev->link = jump;
                        }
@@ -2867,7 +2988,10 @@ _sequential_labels(jit_state_t *_jit)
                if ((jump = next->link)) {
                    for (; jump; jump = link) {
                        link = jump->link;
-                       jump->u.n = node;
+                       if (jump->code == jit_code_movi)
+                           jump->v.n = node;
+                       else
+                           jump->u.n = node;
                        jump->link = node->link;
                        node->link = jump;
                    }
@@ -3022,7 +3146,6 @@ _redundant_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
                }
                break;
            case jit_code_name:         case jit_code_note:
-           case jit_code_align:
                break;
            default:
                return (0);
@@ -3073,7 +3196,7 @@ reverse_jump_code(jit_code_t code)
        case jit_code_bgti_f:   return (jit_code_bunlei_f);
 
        case jit_code_bner_f:   return (jit_code_beqr_f);
-       case jit_code_bnei_f:   return (jit_code_beqr_f);
+       case jit_code_bnei_f:   return (jit_code_beqi_f);
 
        case jit_code_bunltr_f: return (jit_code_bger_f);
        case jit_code_bunlti_f: return (jit_code_bgei_f);
@@ -3860,6 +3983,9 @@ static maybe_unused void
 generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
 #endif
 
+#define patch_alist(revert)            _patch_alist(_jit, revert)
+static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert);
+
 #if defined(__i386__) || defined(__x86_64__)
 #  include "jit_x86.c"
 #elif defined(__mips__)
@@ -3929,3 +4055,40 @@ generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(reg);
 }
 #endif
+
+#if defined(stack_framesize)
+static maybe_unused void
+_patch_alist(jit_state_t *_jit, jit_bool_t revert)
+{
+    jit_int32_t                 diff;
+    jit_node_t         *node;
+    diff = jit_diffsize();
+    if (diff) {
+       if (revert)
+           diff = -diff;
+       for (node = _jitc->function->alist; node; node = node->link) {
+           switch (node->code) {
+               case jit_code_ldxi_c:   case jit_code_ldxi_uc:
+               case jit_code_ldxi_s:   case jit_code_ldxi_us:
+               case jit_code_ldxi_i:
+#if __WORDSIZE == 64
+               case jit_code_ldxi_ui:  case jit_code_ldxi_l:
+#endif
+               case jit_code_ldxi_f:   case jit_code_ldxi_d:
+                   node->w.w -= diff;
+                   break;
+               case jit_code_stxi_c:   case jit_code_stxi_s:
+               case jit_code_stxi_i:
+#if __WORDSIZE == 64
+               case jit_code_stxi_l:
+#endif
+               case jit_code_stxi_f:   case jit_code_stxi_d:
+                   node->u.w -= diff;
+                   break;
+               default:
+                   abort();
+           }
+       }
+    }
+}
+#endif
index 1728fb2..1f31ed6 100644 (file)
@@ -68,14 +68,6 @@ main(int argc, char *argv[])
 #  else
     fprintf(fp, "#if !defined(__ARM_PCS_VFP)\n");
 #  endif
-#elif defined(__mips__)
-#  if __WORDSIZE == 32
-#    if NEW_ABI
-    fprintf(fp, "#if NEW_ABI\n");
-#    else
-    fprintf(fp, "#if !NEW_ABI\n");
-#    endif
-#  endif
 #elif defined(__powerpc__)
     fprintf(fp, "#if defined(__powerpc__)\n");
     fprintf(fp, "#if __BYTE_ORDER == %s\n",
@@ -94,10 +86,6 @@ main(int argc, char *argv[])
        fprintf(fp, "    %d,    /* %s */\n", _szs[offset], code_name[offset]);
 #if defined(__arm__)
     fprintf(fp, "#endif /* __ARM_PCS_VFP */\n");
-#elif defined(__mips__)
-#  if __WORDSIZE == 32
-    fprintf(fp, "#endif /* NEW_ABI */\n");
-#  endif
 #elif defined(__powerpc__)
 #  if __WORDSIZE == 32
     fprintf(fp, "#endif /* "