git subrepo pull (merge) --force deps/lightning
authorPaul Cercueil <paul@crapouillou.net>
Fri, 24 Feb 2023 23:35:38 +0000 (23:35 +0000)
committerPaul Cercueil <paul@crapouillou.net>
Sun, 9 Jul 2023 11:55:56 +0000 (13:55 +0200)
subrepo:
  subdir:   "deps/lightning"
  merged:   "b1983e9036"
upstream:
  origin:   "https://github.com/pcercuei/gnu_lightning.git"
  branch:   "pcsx_rearmed"
  commit:   "b1983e9036"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "2f68596"

108 files changed:
deps/lightning/.gitignore
deps/lightning/.gitrepo
deps/lightning/ChangeLog
deps/lightning/Makefile.am
deps/lightning/THANKS
deps/lightning/TODO
deps/lightning/check/Makefile.am
deps/lightning/check/all.tst
deps/lightning/check/allocar.tst
deps/lightning/check/bit.ok [new file with mode: 0644]
deps/lightning/check/bit.tst [new file with mode: 0644]
deps/lightning/check/call.tst
deps/lightning/check/carg.c
deps/lightning/check/catomic.c
deps/lightning/check/ccall.c
deps/lightning/check/factorial.tst [new file with mode: 0644]
deps/lightning/check/fib.tst
deps/lightning/check/float.tst
deps/lightning/check/lightning.c
deps/lightning/check/protect.c [new file with mode: 0644]
deps/lightning/check/put.tst
deps/lightning/check/riprel.c [new file with mode: 0644]
deps/lightning/check/riprel.ok [new file with mode: 0644]
deps/lightning/check/setcode.c
deps/lightning/check/skip.ok [new file with mode: 0644]
deps/lightning/check/skip.tst [new file with mode: 0644]
deps/lightning/check/stack.tst
deps/lightning/configure.ac
deps/lightning/doc/Makefile.am
deps/lightning/doc/body.texi
deps/lightning/doc/rpn.c
deps/lightning/include/Makefile.am
deps/lightning/include/lightning.h.in
deps/lightning/include/lightning/jit_aarch64.h
deps/lightning/include/lightning/jit_alpha.h
deps/lightning/include/lightning/jit_arm.h
deps/lightning/include/lightning/jit_hppa.h
deps/lightning/include/lightning/jit_ia64.h
deps/lightning/include/lightning/jit_loongarch.h
deps/lightning/include/lightning/jit_mips.h
deps/lightning/include/lightning/jit_ppc.h
deps/lightning/include/lightning/jit_private.h
deps/lightning/include/lightning/jit_riscv.h
deps/lightning/include/lightning/jit_s390.h
deps/lightning/include/lightning/jit_sparc.h
deps/lightning/include/lightning/jit_x86.h
deps/lightning/lib/Makefile.am
deps/lightning/lib/aarch64-logical-immediates.c [new file with mode: 0644]
deps/lightning/lib/jit_aarch64-cpu.c
deps/lightning/lib/jit_aarch64-fpu.c
deps/lightning/lib/jit_aarch64-sz.c
deps/lightning/lib/jit_aarch64.c
deps/lightning/lib/jit_alpha-cpu.c
deps/lightning/lib/jit_alpha-fpu.c
deps/lightning/lib/jit_alpha-sz.c
deps/lightning/lib/jit_alpha.c
deps/lightning/lib/jit_arm-cpu.c
deps/lightning/lib/jit_arm-swf.c
deps/lightning/lib/jit_arm-sz.c
deps/lightning/lib/jit_arm-vfp.c
deps/lightning/lib/jit_arm.c
deps/lightning/lib/jit_disasm.c
deps/lightning/lib/jit_fallback.c
deps/lightning/lib/jit_hppa-cpu.c
deps/lightning/lib/jit_hppa-fpu.c
deps/lightning/lib/jit_hppa-sz.c
deps/lightning/lib/jit_hppa.c
deps/lightning/lib/jit_ia64-cpu.c
deps/lightning/lib/jit_ia64-fpu.c
deps/lightning/lib/jit_ia64-sz.c
deps/lightning/lib/jit_ia64.c
deps/lightning/lib/jit_loongarch-cpu.c
deps/lightning/lib/jit_loongarch-fpu.c
deps/lightning/lib/jit_loongarch-sz.c
deps/lightning/lib/jit_loongarch.c
deps/lightning/lib/jit_memory.c
deps/lightning/lib/jit_mips-cpu.c
deps/lightning/lib/jit_mips-fpu.c
deps/lightning/lib/jit_mips-sz.c
deps/lightning/lib/jit_mips.c
deps/lightning/lib/jit_names.c
deps/lightning/lib/jit_note.c
deps/lightning/lib/jit_ppc-cpu.c
deps/lightning/lib/jit_ppc-fpu.c
deps/lightning/lib/jit_ppc-sz.c
deps/lightning/lib/jit_ppc.c
deps/lightning/lib/jit_print.c
deps/lightning/lib/jit_rewind.c
deps/lightning/lib/jit_riscv-cpu.c
deps/lightning/lib/jit_riscv-fpu.c
deps/lightning/lib/jit_riscv-sz.c
deps/lightning/lib/jit_riscv.c
deps/lightning/lib/jit_s390-cpu.c
deps/lightning/lib/jit_s390-fpu.c
deps/lightning/lib/jit_s390-sz.c
deps/lightning/lib/jit_s390.c
deps/lightning/lib/jit_size.c
deps/lightning/lib/jit_sparc-cpu.c
deps/lightning/lib/jit_sparc-fpu.c
deps/lightning/lib/jit_sparc-sz.c
deps/lightning/lib/jit_sparc.c
deps/lightning/lib/jit_x86-cpu.c
deps/lightning/lib/jit_x86-sse.c
deps/lightning/lib/jit_x86-sz.c
deps/lightning/lib/jit_x86-x87.c
deps/lightning/lib/jit_x86.c
deps/lightning/lib/lightning.c
deps/lightning/size.c

index 6fc5bf9..bc7e971 100644 (file)
@@ -1,3 +1,4 @@
+/build-aux
 +*
 
 *.o
 +*
 
 *.o
index 6cc0878..17edd68 100644 (file)
@@ -6,7 +6,7 @@
 [subrepo]
        remote = https://github.com/pcercuei/gnu_lightning.git
        branch = pcsx_rearmed
 [subrepo]
        remote = https://github.com/pcercuei/gnu_lightning.git
        branch = pcsx_rearmed
-       commit = b910a469a9bea63056eb53430dea4c7b56e447a8
-       parent = 13b02197fcb7575646408094d5583ed7391b1153
+       commit = b1983e9036d35933ffa773d81b61eedbf3ae3b93
+       parent = 638335fabe3ba77b2a5c624a4c4aec52c18488f7
        method = merge
        cmdver = 0.4.3
        method = merge
        cmdver = 0.4.3
index 40ade7a..2cd5273 100644 (file)
@@ -1,3 +1,190 @@
+2023-02-23 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h: Add new 'inst' field to
+       jit_compiler_t, if __mips__ is defined. This field is a simple
+       helper for a pending instruction to be emitted, and that can
+       be emitted out of order.
+       * lib/jit_fallback.c: Update for changes in internal mips patching
+       and jumping macros and function calls.
+       * lib/jit_mips-cpu.c: Core of changes to attempt to fill delay
+       slots with instructions that can be emitted out of order.
+       * lib/jit_mips-fpu.c: Update to use delay slot in branches.
+       * lib/jit_mips.c: Update for new delay slot use logic.
+
+2023-02-20 Paulo Andrade <pcpa@gnu.org>
+
+       * check/float.tst: Add conditionals for mips release for expected
+       NaN truncated to an integer.
+       * check/lightning.c: Add extra preprocessor for mips release.
+       * include/lightning/jit_mips.h: Make the NEW_ABI preprocessor
+       defined to zero if using the n32 or n64 abis. This makes it
+       easier to create runtime checks with an always true or false
+       condition.
+       * lib/jit_mips-cpu.c, lib/jit_mips-fpu.c: Implement mips release
+       6 support.
+       * lib/jit_mips.c: Add more reliable mips release detection code.
+
+2023-02-09 Paulo Andrade <pcpa@gnu.org>
+
+       * check/Makefile.am: Update for new bit.tst test, to check the
+       new clor, clzr, ctor and ctzr instructions.
+       * check/all.tst: Update to verify encoding of new instructions.
+       * check/lightning.c: Update to have the lightning "assembler"
+       understanding the new instructions.
+       * include/lightning.h.in: Define new codes for new instructions.
+       * lib/jit_aarch64.c, lib/jit_alpha.c, lib/jit_arm.c, lib/jit_hppa.c,
+       lib/jit_ia64.c, lib/jit_loongarch.c, lib/jit_mips.c, lib/jit_ppc.c,
+       lib/jit_riscv.c, lib/jit_s390.c, lib/jit_sparc.c, lib/jit_x86.c:
+       Implement fallback version of new instructions.
+       * lib/jit_fallback.c: Actual implementation of the fallbacks of
+       the new instructions.
+       * lib/jit_names.c: Update to print debug information of new
+       instructions.
+
+2023-01-26 Paulo Andrade <pcpa@gnu.org>
+
+       * check/riprel.c, check/riprel.ok: New check files.
+       * check/Makefile.am: Support for new riprel test.
+       * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86.c: Implement
+       %rip relative addressing when reliable. Currently disabled for
+       x32 and _WIN32; could be added for positive relative addresses
+       only where it should work.
+       * lib/lightning.c: Correct problem added in previous patch due
+       to not testing on a 32 bit environment.
+
+2023-01-23 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c, lib/jit_mips-cpu.c: Use pseudo instructions
+       "b" (BEQ(0,0,disp)) and "bal" (BGEZAL(0,disp)) for mips2, when an
+       unconditional branch or function call is known to be in range of a
+       relative jump. This should significantly reduce jit size generation.
+
+2023-01-20 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_mips-cpu.c, lib/jit_mips.c, lib/jit_rewind.c: Adapt
+       code to implement a variable framesize and optimize frame pointer
+       for simple leaf functions.
+
+2023-01-19 Paulo Andrade <pcpa@gnu.org>
+
+       * lib/jit_riscv.c, lib/jit_riscv-cpu.c: Adapt code to use a
+       variable framesize. Previously it was aligning the stack at
+       8 bytes, not 16. Now functions are called with a 16 byte aligned
+       stack.
+
+2023-01-18 Paulo Andrade <pcpa@gnu.org>
+
+       * include/lightning/jit_private.h: Include new framesize field
+       of jit_compiler_t; add new alist field for jit_function_t; add
+       new cvt_offset and need_stack fields specific to x86.
+       * lib/jit_x86.c, lib/jit_x86-cpu: Rewrite code to create stack
+       frames, so that less stack space can be used if no, or very few
+       callee save registers are modified in a function.
+       * jit_x86-sse.c, jit_x86-x87.c: Make CVT_OFFSET variable, and
+       dynamically allocated; this is required to avoid needing to
+       modify twice %rsp at function prologs, even if no stack space
+       is used.
+
+2022-11-09 Paulo Andrade <pcpa@gnu.org>
+
+       * configure.ac: Add new --enable-devel-strong-type-checking
+       option.
+       * include/lightning.h.in: Rework to not need to know if
+       PACKED_STACK is defined, and add a new argument to _jit_arg,
+       _jit_putarg{r,i}, _jit_pusharg{r,i} and _jit_ret{r,i} to have
+       the same code path if PACKED_STACK is defined or not, and also
+       to implement STRONG_TYPE_CHECK enabled with the new
+       --enable-devel-strong-type-checking.
+       * include/lightning/jit_private.h: Add new macros to add assertions
+       for STRONG_TYPE_CHECK and avoid pasting tokens in jit_inc_synth*
+       when the token is not a static known value.
+       * lib/jit_aarch64.c: The first implementation of the new code,
+       working correctly in Apple M1 and with and without STRONG_TYPE_CHECK
+       in Linux.
+
+2022-11-08 Paulo Andrade <pcpa@gnu.org>
+
+       Add support for packed stack arguments as used by Apple M1
+       aarch64 cpus. This requires a major redesign in how Lightning
+       works, because contrary to all other supported ports, in this
+       case arguments must be truncated and sign/zero extended if
+       passed in registers, but when receiving the argument, there
+       is no need to truncate and sign/zero extend.
+       Return values are also treated this way. The callee must
+       truncate sign/zero extend, not the caller.
+       check/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS.
+       check/all.tst: Implement paired arg/getarg/pusharg/putarg/ret
+       codes to validate they do not generate assertions.
+       * check/allocar.tst, check/call.tst, check/fib.tst, check/put.tst,
+       check/stack.tst: Update to pass in all build types.
+       check/lightning.c: Add new codes for extra codes to handle
+       packed stack.
+       * configure.ac: Add a preprocessor define to know if packed stack
+       need is required. This is not really used, as it was moved to
+       jit_aarch64.h.
+       * doc/Makefile.am: Add LIGHTNING_CFLAGS to AM_CFLAGS.
+       * doc/rpn.c: Update to pass in all build types.
+       include/lightning.h.in: Add new codes and reorder enum.
+       * include/lightning/jit_aarch64.h: Detect condition of needing
+       a packed stack.
+       * lib/jit_aarch64-sz.c: Regenerate.
+       * lib/jit_aarch64.c: Major updates for packed stack.
+       * lib/jit_names.c: Updates for debug output.
+       * lib/lightning.c: Update for new codes.
+
+2022-10-31  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+       Add new skip instruction.
+       * .gitignore: Update from Gnulib.
+       * check/Makefile.am: Add tests.
+       * check/lightning.c: Handle skip instructions.
+       * check/protect.c: Rewrite with skip.
+       * check/skip.ok: New test.
+       * check/skip.tst: New test.
+       * doc/body.texi: Document the skip instruction.
+       * include/lightning.h.in: Add the skip instruction.
+       * lib/jit_aarch64-sz.c: Update for skip instruction.
+       * lib/jit_aarch64.c: Implement skip instruction.
+       * lib/jit_alpha-sz.c: Update for skip instruction.
+       * lib/jit_alpha.c: Implement skip instruction.
+       * lib/jit_arm-sz.c: Update for skip instruction.
+       * lib/jit_arm.c: Implement skip instruction.
+       * lib/jit_hppa-sz.c: Update for skip instruction.
+       * lib/jit_hppa.c: Implement skip instruction.
+       * lib/jit_ia64-sz.c: Update for skip instruction.
+       * lib/jit_ia64.c: Implement skip instruction.
+       * lib/jit_loongarch-sz.c: Update for skip instruction.
+       * lib/jit_loongarch.c: Implement skip instruction.
+       * lib/jit_mips-sz.c: Update for skip instruction.
+       * lib/jit_mips.c: Implement skip instruction.
+       * lib/jit_names.c: Update for skip instruction.
+       * lib/jit_ppc-sz.c: Update for skip instruction.
+       * lib/jit_ppc.c: Implement skip instruction.
+       * lib/jit_riscv-sz.c: Update for skip instruction.
+       * lib/jit_riscv.c: Implement skip instruction.
+       * lib/jit_s390-sz.c: Update for skip instruction.
+       * lib/jit_s390.c: Implement skip instruction.
+       * lib/jit_size.c: Treat align and skip in a special way.
+       * lib/jit_sparc-sz.c: Update for skip instruction.
+       * lib/jit_sparc.c: Implement skip instruction.
+       * lib/jit_x86-sz.c: Update for skip instruction.
+       * lib/jit_x86.c: Implement skip instruction.
+       * lib/lightning.c: Classify skip instruction.
+
+2022-10-30  Marc Nieper-Wißkirchen  <marc@nieper-wisskirchen.de>
+
+       Add user-visible functions jit_protect and jit_unprotect.
+       * check/Makefile.am: Add test for jit_protect and jit_unprotect.
+       * check/protect.c: New test.
+       * doc/body.texi: Add documentation for jit_protect and
+       jit_unprotect.
+       * include/lightning.h.in: Add prototypes for jit_protect and
+       jit_unprotect.
+       * include/lightning/jit_private.h: Add a field to store the size
+       of the protected memory.
+       * lib/lightning.c: Remember the size of the protected memory and
+       implement the two new functions.
+
 2022-10-12 Paulo Andrade <pcpa@gnu.org>
 
        * include/lightning/jit_loongarch.h, lib/jit_loongarch-cpu.c,
 2022-10-12 Paulo Andrade <pcpa@gnu.org>
 
        * include/lightning/jit_loongarch.h, lib/jit_loongarch-cpu.c,
index 112deae..8dbbaef 100644 (file)
@@ -1,5 +1,5 @@
 #
 #
-# Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc.
 #
 # This file is part of GNU lightning.
 #
 #
 # This file is part of GNU lightning.
 #
index 0e0f1a9..d5737af 100644 (file)
@@ -19,3 +19,4 @@ Holger Hans Peter Freyther      <holger@moiji-mobile.com>
 Jon Arintok                     <jon.arintok@gmail.com>
 Bruno Haible                    <bruno@clisp.org>
 Marc Nieper-Wißkirchen                <marc@nieper-wisskirchen.de>
 Jon Arintok                     <jon.arintok@gmail.com>
 Bruno Haible                    <bruno@clisp.org>
 Marc Nieper-Wißkirchen                <marc@nieper-wisskirchen.de>
+Paul Cercueil                   <paul@crapouillou.net>
index 676af02..8b13789 100644 (file)
@@ -1,28 +1 @@
-       * Validate that divrem in jit_x86-cpu.c is not modifying
-       the non result arguments. This is not verified by clobber.tst,
-       as it only checks registers not involved in the operation
-       (because it does not know about values being set as input
-       for the the operation).
 
 
-       * Write a simple higher level language implementation generating
-       jit with lightning, that could be some lisp or C like language.
-
-       * rerun ./configure --enable-devel-get-jit-size and regenerate
-       the related jit_$arch-sz.c for the ports where nodata is
-       meaningful:
-       hppa            (done)
-       i586            (done)
-       ia64
-       mips o32        (done)
-       mips n32
-       mips n64
-       powerpc 32      (done)
-       powerpc 64      (done)
-       ppc
-       s390x           (done)
-       sparc           (done)
-       x86_64          (done)
-       Missing ones are due to no longer (remote) access to such hosts
-       and may be broken with jit_set_data(..., JIT_DISABLE_DATA).
-       (ia64 hp-ux or linx), (irix mips for 32 or 64 abi), and
-       (darwin ppc).
index 10537b1..c77f5cd 100644 (file)
@@ -1,5 +1,5 @@
 #
 #
-# Copyright 2012-2022 Free Software Foundation, Inc.
+# Copyright 2012-2023 Free Software Foundation, Inc.
 #
 # This file is part of GNU lightning.
 #
 #
 # This file is part of GNU lightning.
 #
 # License for more details.
 #
 
 # License for more details.
 #
 
-AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
+AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
+       -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 
 check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \
 
 check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \
-       catomic
+       catomic protect riprel
 
 lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
 lightning_SOURCES = lightning.c
 
 lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
 lightning_SOURCES = lightning.c
@@ -46,6 +47,12 @@ cva_list_SOURCES = cva_list.c
 catomic_LDADD = $(top_builddir)/lib/liblightning.la -lm -lpthread $(SHLIB)
 catomic_SOURCES = catomic.c
 
 catomic_LDADD = $(top_builddir)/lib/liblightning.la -lm -lpthread $(SHLIB)
 catomic_SOURCES = catomic.c
 
+protect_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+protect_SOURCES = protect.c
+
+riprel_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+riprel_SOURCES = riprel.c
+
 $(top_builddir)/lib/liblightning.la:
        cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
 
 $(top_builddir)/lib/liblightning.la:
        cd $(top_builddir)/lib; $(MAKE) $(AM_MAKEFLAGS) liblightning.la
 
@@ -105,8 +112,10 @@ EXTRA_DIST =                               \
        range.tst       range.ok        \
        ranger.tst      ranger.ok       \
        ret.tst         ret.ok          \
        range.tst       range.ok        \
        ranger.tst      ranger.ok       \
        ret.tst         ret.ok          \
+       skip.tst        skip.ok         \
        tramp.tst       tramp.ok        \
        va_list.tst     va_list.ok      \
        tramp.tst       tramp.ok        \
        va_list.tst     va_list.ok      \
+       bit.tst         bit.ok          \
        check.sh                        \
        check.x87.sh                    \
        check.arm.sh    check.swf.sh    \
        check.sh                        \
        check.x87.sh                    \
        check.arm.sh    check.swf.sh    \
@@ -114,7 +123,8 @@ EXTRA_DIST =                                \
        check.arm4.swf.sh               \
        check.nodata.sh                 \
        check.x87.nodata.sh             \
        check.arm4.swf.sh               \
        check.nodata.sh                 \
        check.x87.nodata.sh             \
-       run-test        all.tst
+       run-test        all.tst         \
+       collatz.tst     factorial.tst
 
 base_TESTS =                           \
        3to2 add align allocai          \
 
 base_TESTS =                           \
        3to2 add align allocai          \
@@ -135,8 +145,8 @@ base_TESTS =                                \
        clobber carry call              \
        float jmpr live put             \
        qalu_mul qalu_div               \
        clobber carry call              \
        float jmpr live put             \
        qalu_mul qalu_div               \
-       range ranger ret tramp          \
-       va_list
+       range ranger ret skip tramp     \
+       va_list bit
 
 $(base_TESTS): check.sh
        $(LN_S) $(srcdir)/check.sh $@
 
 $(base_TESTS): check.sh
        $(LN_S) $(srcdir)/check.sh $@
@@ -317,13 +327,14 @@ nodata_TESTS =                                            \
        clobber.nodata carry.nodata call.nodata         \
        float.nodata jmpr.nodata tramp.nodata           \
        range.nodata ranger.nodata put.nodata           \
        clobber.nodata carry.nodata call.nodata         \
        float.nodata jmpr.nodata tramp.nodata           \
        range.nodata ranger.nodata put.nodata           \
-       va_list.nodata
+       va_list.nodata bit.nodata
 $(nodata_TESTS):       check.nodata.sh
        $(LN_S) $(srcdir)/check.nodata.sh $@
 TESTS += $(nodata_TESTS)
 endif
 
 $(nodata_TESTS):       check.nodata.sh
        $(LN_S) $(srcdir)/check.nodata.sh $@
 TESTS += $(nodata_TESTS)
 endif
 
-TESTS += ccall self setcode nodata ctramp carg cva_list catomic
+TESTS += ccall self setcode nodata ctramp carg cva_list catomic \
+         protect riprel
 CLEANFILES = $(TESTS)
 
 #TESTS_ENVIRONMENT=$(srcdir)/run-test;
 CLEANFILES = $(TESTS)
 
 #TESTS_ENVIRONMENT=$(srcdir)/run-test;
index ac4fc97..d24f7ae 100644 (file)
@@ -2,15 +2,16 @@
 .code
        prolog
        allocai 32 $buf
 .code
        prolog
        allocai 32 $buf
-       arg $c
-       arg $uc
-       arg $s
-       arg $us
-       arg $i
+       arg_c $c
+       arg_c $uc
+       arg_s $s
+       arg_s $us
+       arg_i $i
+       arg_i $ui
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       arg $ui
-       arg $l
+       arg_l $l
 #endif
 #endif
+       arg $a
        getarg_c %r0 $c
        getarg_uc %r0 $uc
        getarg_s %r0 $s
        getarg_c %r0 $c
        getarg_uc %r0 $uc
        getarg_s %r0 $s
        getarg_ui %r0 $ui
        getarg_l %r0 $l
 #endif
        getarg_ui %r0 $ui
        getarg_l %r0 $l
 #endif
+       getarg %r0 $a
+       putargr_c %r0 $c
+       putargi_c 1 $c
+       putargr_uc %r0 $uc
+       putargi_uc 1 $uc
+       putargr_s %r0 $s
+       putargi_s 1 $s
+       putargr_us %r0 $us
+       putargi_us 1 $us
+       putargr_i %r0 $i
+       putargi_i 1 $ui
+#if __WORDSIZE == 64
+       putargr_ui %r0 $ui
+       putargi_ui 1 $ui
+       putargr_l %r0 $l
+       putargi_l 1 $l
+#endif
+       putargr %r0 $a
+       putargi 1 $a
        addr %r0 %r1 %r2
        addi %r0 %r1 2
        addcr %r0 %r1 %r2
        addr %r0 %r1 %r2
        addi %r0 %r1 2
        addcr %r0 %r1 %r2
        rshi_u %r0 %r1 2
        negr %r0 %r1
        comr %r0 %r1
        rshi_u %r0 %r1 2
        negr %r0 %r1
        comr %r0 %r1
+       clor %r0 %r1
+       clzr %r0 %r1
+       ctor %r0 %r1
+       ctzr %r0 %r1
        ltr %r0 %r1 %r2
        lti %r0 %r1 2
        ltr_u %r0 %r1 %r2
        ltr %r0 %r1 %r2
        lti %r0 %r1 2
        ltr_u %r0 %r1 %r2
@@ -205,6 +229,15 @@ label:
        callr %r0
        calli label
        prepare
        callr %r0
        calli label
        prepare
+       pushargr_c %r0
+       pushargr_uc %r0
+       pushargr_s %r0
+       pushargr_us %r0
+       pushargr_i %r0
+#if __WORDSIZE == 64
+       pushargr_ui %r0
+       pushargr_l %r0
+#endif
        pushargr %r0
        finishr %r0
        prepare
        pushargr %r0
        finishr %r0
        prepare
@@ -212,6 +245,15 @@ label:
        ellipsis
        finishi 0x80000000
        ret
        ellipsis
        finishi 0x80000000
        ret
+       retr_c %r1
+       retr_uc %r1
+       retr_s %r1
+       retr_us %r1
+       retr_i %r1
+#if __WORDSIZE == 64
+       retr_ui %r1
+       retr_l %r1
+#endif
        retr %r1
        reti 2
        retval_c %r1
        retr %r1
        reti 2
        retval_c %r1
@@ -225,6 +267,8 @@ label:
 #endif
        arg_f $f
        getarg_f %f1 $f
 #endif
        arg_f $f
        getarg_f %f1 $f
+       putargr_f %f1 $f
+       putargi_f 1.0 $f
        addr_f %f0 %f1 %f2
        addi_f %f0 %f1 0.5
        subr_f %f0 %f1 %f2
        addr_f %f0 %f1 %f2
        addi_f %f0 %f1 0.5
        subr_f %f0 %f1 %f2
@@ -323,6 +367,8 @@ unordi:
        retval_f %f1
        arg_d $f
        getarg_d %f1 $f
        retval_f %f1
        arg_d $f
        getarg_d %f1 $f
+       putargr_d %f1 $f
+       putargi_d 1.0 $f
        addr_d %f0 %f1 %f2
        addi_d %f0 %f1 0.5
        subr_d %f0 %f1 %f2
        addr_d %f0 %f1 %f2
        addi_d %f0 %f1 0.5
        subr_d %f0 %f1 %f2
index e3ee010..1bffef8 100644 (file)
@@ -55,7 +55,7 @@ fill##T##done:                                                        \
 #define fill_us                fill_s
 #define fill_ui                fill_i
 
 #define fill_us                fill_s
 #define fill_ui                fill_i
 
-#define ARG(  T, N)                    arg    $arg##T##N
+#define ARG(  T, N)                    arg##T $arg##T##N
 #define ARGF( T, N)                    arg##T $arg##T##N
 #define ARG1( K, T)                    ARG##K(T, 0)
 #define ARG2( K, T)    ARG1( K, T)     ARG##K(T, 1)
 #define ARGF( T, N)                    arg##T $arg##T##N
 #define ARG1( K, T)                    ARG##K(T, 0)
 #define ARG2( K, T)    ARG1( K, T)     ARG##K(T, 1)
@@ -74,56 +74,56 @@ fill##T##done:                                                      \
 #define ARG15(K, T)    ARG14(K, T)     ARG##K(T, 14)
 #define ARG16(K, T)    ARG15(K, T)     ARG##K(T, 15)
 #define ARG_c(N)                       ARG##N( , _c)
 #define ARG15(K, T)    ARG14(K, T)     ARG##K(T, 14)
 #define ARG16(K, T)    ARG15(K, T)     ARG##K(T, 15)
 #define ARG_c(N)                       ARG##N( , _c)
-#define ARG_uc(N)                      ARG##N( , _uc)
+#define ARG_uc(N)                      ARG##N( , _c)
 #define ARG_s(N)                       ARG##N( , _s)
 #define ARG_s(N)                       ARG##N( , _s)
-#define ARG_us(N)                      ARG##N( , _us)
+#define ARG_us(N)                      ARG##N( , _s)
 #define ARG_i(N)                       ARG##N( , _i)
 #define ARG_i(N)                       ARG##N( , _i)
-#define ARG_ui(N)                      ARG##N( , _ui)
+#define ARG_ui(N)                      ARG##N( , _i)
 #define ARG_l(N)                       ARG##N( , _l)
 #define ARG_f(N)                       ARG##N(F, _f)
 #define ARG_d(N)                       ARG##N(F, _d)
 
 #define ARG_l(N)                       ARG##N( , _l)
 #define ARG_f(N)                       ARG##N(F, _f)
 #define ARG_d(N)                       ARG##N(F, _d)
 
-#define CHK(N, T, V)                                           \
-       getarg %r0 $arg##T##V                                   \
+#define CHK(N, T, TT, V)                                       \
+       getarg##T %r0 $arg##TT##V                               \
        ldxi##T %r1 %v0 $(V * szof##T)                          \
        beqr N##T##V %r0 %r1                                    \
        calli @abort                                            \
 N##T##V:
        ldxi##T %r1 %v0 $(V * szof##T)                          \
        beqr N##T##V %r0 %r1                                    \
        calli @abort                                            \
 N##T##V:
-#define CHKF(N, T, V)                                          \
-       getarg##T %f0 $arg##T##V                                \
+#define CHKF(N, T, TT, V)                                      \
+       getarg##T %f0 $arg##TT##V                               \
        ldxi##T %f1 %v0 $(V * szof##T)                          \
        beqr##T N##T##V %f0 %f1                                 \
        calli @abort                                            \
 N##T##V:
 
        ldxi##T %f1 %v0 $(V * szof##T)                          \
        beqr##T N##T##V %f0 %f1                                 \
        calli @abort                                            \
 N##T##V:
 
-#define GET1( K, N, T, V)                              CHK##K(N, T, 0)
-#define GET2( K, N, T, V)      GET1( K, N, T, V)       CHK##K(N, T, 1)
-#define GET3( K, N, T, V)      GET2( K, N, T, V)       CHK##K(N, T, 2)
-#define GET4( K, N, T, V)      GET3( K, N, T, V)       CHK##K(N, T, 3)
-#define GET5( K, N, T, V)      GET4( K, N, T, V)       CHK##K(N, T, 4)
-#define GET6( K, N, T, V)      GET5( K, N, T, V)       CHK##K(N, T, 5)
-#define GET7( K, N, T, V)      GET6( K, N, T, V)       CHK##K(N, T, 6)
-#define GET8( K, N, T, V)      GET7( K, N, T, V)       CHK##K(N, T, 7)
-#define GET9( K, N, T, V)      GET8( K, N, T, V)       CHK##K(N, T, 8)
-#define GET10(K, N, T, V)      GET9( K, N, T, V)       CHK##K(N, T, 9)
-#define GET11(K, N, T, V)      GET10(K, N, T, V)       CHK##K(N, T, 10)
-#define GET12(K, N, T, V)      GET11(K, N, T, V)       CHK##K(N, T, 11)
-#define GET13(K, N, T, V)      GET12(K, N, T, V)       CHK##K(N, T, 12)
-#define GET14(K, N, T, V)      GET13(K, N, T, V)       CHK##K(N, T, 13)
-#define GET15(K, N, T, V)      GET14(K, N, T, V)       CHK##K(N, T, 14)
-#define GET16(K, N, T, V)      GET15(K, N, T, V)       CHK##K(N, T, 15)
+#define GET1( K, N, T, TT, V)                          CHK##K(N, T, TT, 0)
+#define GET2( K, N, T, TT, V)  GET1( K, N, T, TT, V)   CHK##K(N, T, TT, 1)
+#define GET3( K, N, T, TT, V)  GET2( K, N, T, TT, V)   CHK##K(N, T, TT, 2)
+#define GET4( K, N, T, TT, V)  GET3( K, N, T, TT, V)   CHK##K(N, T, TT, 3)
+#define GET5( K, N, T, TT, V)  GET4( K, N, T, TT, V)   CHK##K(N, T, TT, 4)
+#define GET6( K, N, T, TT, V)  GET5( K, N, T, TT, V)   CHK##K(N, T, TT, 5)
+#define GET7( K, N, T, TT, V)  GET6( K, N, T, TT, V)   CHK##K(N, T, TT, 6)
+#define GET8( K, N, T, TT, V)  GET7( K, N, T, TT, V)   CHK##K(N, T, TT, 7)
+#define GET9( K, N, T, TT, V)  GET8( K, N, T, TT, V)   CHK##K(N, T, TT, 8)
+#define GET10(K, N, T, TT, V)  GET9( K, N, T, TT, V)   CHK##K(N, T, TT, 9)
+#define GET11(K, N, T, TT, V)  GET10(K, N, T, TT, V)   CHK##K(N, T, TT, 10)
+#define GET12(K, N, T, TT, V)  GET11(K, N, T, TT, V)   CHK##K(N, T, TT, 11)
+#define GET13(K, N, T, TT, V)  GET12(K, N, T, TT, V)   CHK##K(N, T, TT, 12)
+#define GET14(K, N, T, TT, V)  GET13(K, N, T, TT, V)   CHK##K(N, T, TT, 13)
+#define GET15(K, N, T, TT, V)  GET14(K, N, T, TT, V)   CHK##K(N, T, TT, 14)
+#define GET16(K, N, T, TT, V)  GET15(K, N, T, TT, V)   CHK##K(N, T, TT, 15)
 
 
-#define GET_c(N, M)            GET##N( , c##N,  _c,  M)
-#define GET_uc(N, M)           GET##N( , uc##N, _uc, M)
-#define GET_s(N, M)            GET##N( , s##N,  _s,  M)
-#define GET_us(N, M)           GET##N( , us##N, _us, M)
-#define GET_i(N, M)            GET##N( , i##N,  _i,  M)
-#define GET_ui(N, M)           GET##N( , ui##N, _ui, M)
-#define GET_l(N, M)            GET##N( , l##N,  _l,  M)
-#define GET_f(N, M)            GET##N(F, f##N,  _f,  M)
-#define GET_d(N, M)            GET##N(F, d##N,  _d,  M)
+#define GET_c(N, M)            GET##N( , c##N,  _c,  _c, M)
+#define GET_uc(N, M)           GET##N( , uc##N, _uc, _c, M)
+#define GET_s(N, M)            GET##N( , s##N,  _s,  _s, M)
+#define GET_us(N, M)           GET##N( , us##N, _us, _s, M)
+#define GET_i(N, M)            GET##N( , i##N,  _i,  _i, M)
+#define GET_ui(N, M)           GET##N( , ui##N, _ui, _i, M)
+#define GET_l(N, M)            GET##N( , l##N,  _l,  _l, M)
+#define GET_f(N, M)            GET##N(F, f##N,  _f,  _f, M)
+#define GET_d(N, M)            GET##N(F, d##N,  _d,  _d, M)
 
 
-#define PUSH(  T, V)           pushargi    V
+#define PUSH(  T, V)           pushargi##T V
 #define PUSHF( T, V)           pushargi##T V
 #define PUSH0( K, T)           /**/
 #define PUSH1( K, T)                                   PUSH##K(T, 0)
 #define PUSHF( T, V)           pushargi##T V
 #define PUSH0( K, T)           /**/
 #define PUSH1( K, T)                                   PUSH##K(T, 0)
@@ -161,14 +161,14 @@ test##T##_0:                                                      \
        ret                                                     \
        epilog
 
        ret                                                     \
        epilog
 
-#define DEFN(N, M, T)                                          \
+#define DEFN(N, M, T, TT)                                      \
        name test##T##_##N                                      \
 test##T##_##N:                                                 \
        prolog                                                  \
        arg $argp                                               \
        /* stack buffer in %v0 */                               \
        getarg %v0 $argp                                        \
        name test##T##_##N                                      \
 test##T##_##N:                                                 \
        prolog                                                  \
        arg $argp                                               \
        /* stack buffer in %v0 */                               \
        getarg %v0 $argp                                        \
-       ARG##T(N)                                               \
+       ARG##TT(N)                                              \
        /* validate arguments */                                \
        GET##T(N, M)                                            \
        /* heap buffer in %v1 */                                \
        /* validate arguments */                                \
        GET##T(N, M)                                            \
        /* heap buffer in %v1 */                                \
@@ -260,24 +260,24 @@ test##T##_17_done:                                                \
        ret                                                     \
        epilog
 
        ret                                                     \
        epilog
 
-#define DEF(  T)                                               \
+#define DEF(  T, TT)                                           \
        DEF0( T)                                                \
        DEF0( T)                                                \
-       DEFN( 1,  0, T)                                         \
-       DEFN( 2,  1, T)                                         \
-       DEFN( 3,  2, T)                                         \
-       DEFN( 4,  3, T)                                         \
-       DEFN( 5,  4, T)                                         \
-       DEFN( 6,  5, T)                                         \
-       DEFN( 7,  6, T)                                         \
-       DEFN( 8,  7, T)                                         \
-       DEFN( 9,  8, T)                                         \
-       DEFN(10,  9, T)                                         \
-       DEFN(11, 10, T)                                         \
-       DEFN(12, 11, T)                                         \
-       DEFN(13, 12, T)                                         \
-       DEFN(14, 13, T)                                         \
-       DEFN(15, 14, T)                                         \
-       DEFN(16, 15, T)                                         \
+       DEFN( 1,  0, T, TT)                                     \
+       DEFN( 2,  1, T, TT)                                     \
+       DEFN( 3,  2, T, TT)                                     \
+       DEFN( 4,  3, T, TT)                                     \
+       DEFN( 5,  4, T, TT)                                     \
+       DEFN( 6,  5, T, TT)                                     \
+       DEFN( 7,  6, T, TT)                                     \
+       DEFN( 8,  7, T, TT)                                     \
+       DEFN( 9,  8, T, TT)                                     \
+       DEFN(10,  9, T, TT)                                     \
+       DEFN(11, 10, T, TT)                                     \
+       DEFN(12, 11, T, TT)                                     \
+       DEFN(13, 12, T, TT)                                     \
+       DEFN(14, 13, T, TT)                                     \
+       DEFN(15, 14, T, TT)                                     \
+       DEFN(16, 15, T, TT)                                     \
        DEFX(T)
 
 #define CALL(T)                        calli test##T##_17
        DEFX(T)
 
 #define CALL(T)                        calli test##T##_17
@@ -323,17 +323,17 @@ memcpy_done:
        FILLF(_f)
        FILLF(_d)
 
        FILLF(_f)
        FILLF(_d)
 
-       DEF(_c)
-       DEF(_uc)
-       DEF(_s)
-       DEF(_us)
-       DEF(_i)
+       DEF(_c, _c)
+       DEF(_uc, _c)
+       DEF(_s, _s)
+       DEF(_us, _s)
+       DEF(_i, _i)
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       DEF(_ui)
-       DEF(_l)
+       DEF(_ui, _i)
+       DEF(_l, _l)
 #endif
 #endif
-       DEF(_f)
-       DEF(_d)
+       DEF(_f, _f)
+       DEF(_d, _d)
 
        name main
 main:
 
        name main
 main:
diff --git a/deps/lightning/check/bit.ok b/deps/lightning/check/bit.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/bit.tst b/deps/lightning/check/bit.tst
new file mode 100644 (file)
index 0000000..b721d5c
--- /dev/null
@@ -0,0 +1,881 @@
+/* If the fallback clor, clzr, ctor and ctzr are used, it might be better
+ * to implement it as functions, as inlined it is almost as large as a
+ * function.
+ * Below is an example of how to do it.
+ */
+
+.data  4096
+str_clo:
+.c     "clo"
+str_clz:
+.c     "clz"
+str_cto:
+.c     "cto"
+str_ctz:
+.c     "ctz"
+print_fmt:
+#if __WORDSIZE == 64
+.c     "%s (0x%016lx) %s = %d\n"
+#else
+.c     "%s (0x%08lx) %s = %d\n"
+#endif
+ok:
+.c     "ok\n"
+
+#define BIT2(OP, ARG, RES, R0, R1)                     \
+       movi %R1 ARG                                    \
+       OP##r %R0 %R1                                   \
+       beqi OP##R0##R1##ARG %R0 RES                    \
+       calli @abort                                    \
+OP##R0##R1##ARG:
+
+#define BIT1(OP, ARG, RES, V0, V1, V2, R0, R1, R2)     \
+       BIT2(OP, ARG, RES, V0, V0)                      \
+       BIT2(OP, ARG, RES, V0, V1)                      \
+       BIT2(OP, ARG, RES, V0, V2)                      \
+       BIT2(OP, ARG, RES, V0, R0)                      \
+       BIT2(OP, ARG, RES, V0, R1)                      \
+       BIT2(OP, ARG, RES, V0, R2)
+
+#define  BIT(OP, ARG, RES, V0, V1, V2, R0, R1, R2)     \
+       BIT1(OP, ARG, RES, V1, V2, R0, R1, R2, V0)      \
+       BIT1(OP, ARG, RES, V2, R0, R1, R2, V0, V1)      \
+       BIT1(OP, ARG, RES, R0, R1, R2, V0, V1, V2)      \
+       BIT1(OP, ARG, RES, R1, R2, V0, V1, V2, R0)      \
+       BIT1(OP, ARG, RES, R2, V0, V1, V2, R0, R1)
+
+#define  CLO(ARG, RES)                                 \
+        BIT(clo, ARG, RES, v0, v1, v2, r0, r1, r2)
+#define  CLZ(ARG, RES)                                 \
+        BIT(clz, ARG, RES, v0, v1, v2, r0, r1, r2)
+#define  CTO(ARG, RES)                                 \
+        BIT(cto, ARG, RES, v0, v1, v2, r0, r1, r2)
+#define  CTZ(ARG, RES)                                 \
+        BIT(ctz, ARG, RES, v0, v1, v2, r0, r1, r2)
+
+.code
+       jmpi main
+/*
+       jit_uword_t cto(jit_uword_t r0) {
+               r0 = ~r0;
+               if (r0 == 0)
+                       r0 = __WORDSIZE;
+               else
+                       r0 = ctz(r0);
+               return r0;
+       } 
+ */
+name cto
+cto:
+       prolog
+       arg $in
+       getarg %r0 $in
+       comr %r0 %r0
+       bnei do_cto %r0 0
+       movi %r0 __WORDSIZE
+       jmpi done_cto
+do_cto:
+       prepare
+               pushargr %r0
+       finishi ctz
+       retval %r0
+done_cto:
+       retr %r0
+       epilog
+
+/*
+       jit_uword_t clo(jit_uword_t r0) {
+               r0 = ~r0;
+               if (r0 == 0)
+                       r0 = __WORDSIZE;
+               else
+                       r0 = clz(r0);
+               return r0;
+       } 
+ */
+name clo
+clo:
+       prolog
+       arg $in
+       getarg %r0 $in
+       comr %r0 %r0
+       bnei do_clo %r0 0
+       movi %r0 __WORDSIZE
+       jmpi done_clo
+do_clo:
+       prepare
+               pushargr %r0
+       finishi clz
+       retval %r0
+done_clo:
+       retr %r0
+       epilog
+
+/*
+       jit_uword_t clz(jit_word_t r1) {
+               jit_uword_t     r0, r2;
+               if (r1 == 0)
+                       r0 = __WORDSIZE;
+               else {
+                       r0 = 0;
+       #if __WORDSIZE == 64
+                       r2 = 0xffffffff00000000UL;
+                       if (!(r1 & r2)) {
+                               r1 <<= 32;
+                               r0 += 32;
+                       }
+                       r2 <<= 16;
+       #else
+                       r2 = 0xffff0000UL;
+       #endif
+                       if (!(r1 & r2)) {
+                               r1 <<= 16;
+                               r0 += 16;
+                       }
+                       r2 <<= 8;
+                       if (!(r1 & r2)) {
+                               r1 <<= 8;
+                               r0 += 8;
+                       }
+                       r2 <<= 4;
+                       if (!(r1 & r2)) {
+                               r1 <<= 4;
+                               r0 += 4;
+                       }
+                       r2 <<= 2;
+                       if (!(r1 & r2)) {
+                               r1 <<= 2;
+                               r0 += 2;
+                       }
+                       r2 <<= 1;
+                       if (!(r1 & r2))
+                               r0 += 1;
+               }
+               return r0;
+       } 
+ */
+name clz
+clz:
+       prolog
+       arg $in
+       getarg %r1 $in
+       bnei lun %r1 0
+       reti __WORDSIZE
+lun:
+       movi %r0 0
+#if __WORDSIZE == 64
+       movi %r2 0xffffffff00000000
+       bmsr l32 %r1 %r2
+       lshi %r1 %r1 32
+       addi %r0 %r0 32
+l32:
+       lshi %r2 %r2 16
+#else
+       movi %r2 0xffff0000
+#endif
+       bmsr l16 %r1 %r2
+       lshi %r1 %r1 16
+       addi %r0 %r0 16
+l16:
+       lshi %r2 %r2 8
+       bmsr  l8 %r1 %r2
+       lshi %r1 %r1 8
+       addi %r0 %r0 8
+l8:
+       lshi %r2 %r2 4
+       bmsr  l4 %r1 %r2
+       lshi %r1 %r1 4
+       addi %r0 %r0 4
+l4:
+       lshi %r2 %r2 2
+       bmsr  l2 %r1 %r2
+       lshi %r1 %r1 2
+       addi %r0 %r0 2
+l2:
+       lshi %r2 %r2 1
+       bmsr  l1 %r1 %r2
+       addi %r0 %r0 1
+l1:
+       retr %r0
+       epilog
+
+/*
+       jit_uword_t ctz(jit_uword_t r1) {
+               jit_uword_t     r0, r2;
+               if (r1 == 0)
+                       r0 = __WORDSIZE;
+               else {
+                       r0 = 0;
+       #if __WORDSIZE == 64
+                       r2 = 0xffffffffUL;;
+                       if (!(r1 & r2)) {
+                               r1 >>= 32;
+                               r0 += 32;
+                       }
+                       r2 >>= 16;
+       #else
+                       r2 = 0xffffUL;;
+       #endif
+                       if (!(r1 & r2)) {
+                               r1 >>= 16;
+                               r0 += 16;
+                       }
+                       r2 >>= 8;
+                       if (!(r1 & r2)) {
+                               r1 >>= 8;
+                               r0 += 8;
+                       }
+                       r2 >>= 4;
+                       if (!(r1 & r2)) {
+                               r1 >>= 4;
+                               r0 += 4;
+                       }
+                       r2 >>= 2;
+                       if (!(r1 & r2)) {
+                               r1 >>= 2;
+                               r0 += 2;
+                       }
+                       r2 >>= 1;
+                       if (!(r1 & r2))
+                               r0 += 1;
+               }
+               return r0;
+       }
+*/
+name   ctz
+ctz:
+       prolog
+       arg $in
+       getarg %r1 $in
+       bnei tun %r1 0
+       reti __WORDSIZE
+tun:
+#if __WORDSIZE == 64
+       movi %r0 0
+       movi %r2 0xffffffff
+       bmsr t32 %r1 %r2
+       rshi_u %r1 %r1 32
+       addi %r0 %r0 32
+t32:
+       rshi %r2 %r2 16
+#else
+       movi %r2 0xffff
+#endif
+       bmsr t16 %r1 %r2
+       rshi_u %r1 %r1 16
+       addi %r0 %r0 16
+t16:
+       rshi %r2 %r2 8
+       bmsr  t8 %r1 %r2
+       rshi_u %r1 %r1 8
+       addi %r0 %r0 8
+t8:
+       rshi %r2 %r2 4
+       bmsr  t4 %r1 %r2
+       rshi_u %r1 %r1 4
+       addi %r0 %r0 4
+t4:
+       rshi %r2 %r2 2
+       bmsr  t2 %r1 %r2
+       rshi_u %r1 %r1 2
+       addi %r0 %r0 2
+t2:
+       rshi %r2 %r2 1
+       bmsr  t1 %r1 %r2
+       addi %r0 %r0 1
+t1:
+       retr %r0
+       epilog
+
+/*
+       char *bitsprint(char *v0, jit_uword_t v1) {
+               jit_uword_t r0, r1;
+               memset(v0, '0', __WORDSIZE);
+               v0[__WORDSIZE] = 0;
+               for (r0 = 1L << (__WORDSIZE - 1), r1 = 0; r0; r0 >>= 1, ++r1) {
+                       if (v1 & r0)
+                               v0[r1] = '1';
+               }
+               return v0;
+       }
+ */
+name bitsprint
+bitsprint:
+       prolog
+       arg $buf
+       arg $val
+       getarg %v0 $buf
+       getarg %v1 $val
+       prepare
+               pushargr %v0
+               pushargi '0'
+               pushargi __WORDSIZE
+       finishi @memset
+       movi %r0 0
+       addi %r1 %v0 __WORDSIZE
+       str_c %r1 %r0
+       movi %r0 $(1 << (__WORDSIZE - 1))
+       movi %r1 0
+       movi %r2 '1'
+bitloop:
+       bmcr bitzero %v1 %r0
+       stxr_c %r1 %v0 %r2
+bitzero:
+       addi %r1 %r1 1
+       rshi_u %r0 %r0 1
+       bnei bitloop %r0 0
+       retr %v0
+       epilog
+
+/*
+       #if 0
+       int main(int argc, char *argv[]) {
+               jit_uword_t      r0, v0, v1, v2;
+               char             buf[80];
+       #if __WORDSIZE == 64
+               char            *fmt = "%s (0x%016lx) %s = %d\n";
+               v0 = 0x8000000000000000UL;
+               v2 = 0xffffffffffffffffUL;
+       #else
+               char            *fmt = "%s (0x%08lx) %s = %d\n";
+               v0 = 0x80000000UL;
+               v2 = 0xffffffffUL;
+       #endif
+               do {
+                       v1 = v0 - 1;
+                       r0 = clz(v0);
+                       bitsprint(buf, v0);
+                       printf(fmt, "clz", v0, buf, r0);
+                       r0 = clo(v2);
+                       bitsprint(buf, v2);
+                       printf(fmt, "clo", v2, buf, r0);
+                       r0 = ctz(v0);
+                       bitsprint(buf, v0);
+                       printf(fmt, "ctz", v0, buf, r0);
+                       r0 = cto(v1);
+                       bitsprint(buf, v1);
+                       printf(fmt, "cto", v1, buf, r0);
+                       v0 >>= 1;
+                       v2 <<= 1;
+               } while ((jit_word_t)v1 > -1);
+               return 0;
+       }
+       #endif
+ */
+
+/* Make it "#if 1" for a "debug mode", that helps in regenerating tables,
+ * or temporary state while implementing optimized port specific versions. */
+#if 0
+#define CALL_FUNC      1
+       name main
+main:
+       prolog
+       allocai 80 $buf
+#if __WORDSIZE == 64
+       movi %v0 0x8000000000000000
+       movi %v2 0xffffffffffffffff
+#else
+       movi %v0 0x80000000
+       movi %v2 0xffffffff
+#endif
+loop:
+       subi %v1 %v0 1
+       addi %r1 %fp $buf
+       prepare
+               pushargr %r1
+               pushargr %v0
+       finishi bitsprint
+#if CALL_FUNC
+       prepare
+               pushargr %v0
+       finishi clz
+       retval %r0
+#else
+       clzr %r0 %v0
+#endif
+       addi %r1 %fp $buf
+       prepare
+               pushargi print_fmt
+               ellipsis
+               pushargi str_clz
+               pushargr %v0
+               pushargr %r1
+               pushargr %r0
+       finishi @printf
+       addi %r1 %fp $buf
+       prepare
+               pushargr %r1
+               pushargr %v2
+       finishi bitsprint
+#if CALL_FUNC
+       prepare
+               pushargr %v2
+       finishi clo
+       retval %r0
+#else
+       clor %r0 %v2
+#endif
+       addi %r1 %fp $buf
+       prepare
+               pushargi print_fmt
+               ellipsis
+               pushargi str_clo
+               pushargr %v2
+               pushargr %r1
+               pushargr %r0
+       finishi @printf
+       addi %r1 %fp $buf
+       prepare
+               pushargr %r1
+               pushargr %v0
+       finishi bitsprint
+#if CALL_FUNC
+       prepare
+               pushargr %v0
+       finishi ctz
+       retval %r0
+#else
+       ctzr %r0 %v0
+#endif
+       addi %r1 %fp $buf
+       prepare
+               pushargi print_fmt
+               ellipsis
+               pushargi str_ctz
+               pushargr %v0
+               pushargr %r1
+               pushargr %r0
+       finishi @printf
+       addi %r1 %fp $buf
+       prepare
+               pushargr %r1
+               pushargr %v1
+       finishi bitsprint
+#if CALL_FUNC
+       prepare
+               pushargr %v1
+       finishi cto
+       retval %r0
+#else
+       ctor %r0 %v1
+#endif
+       addi %r1 %fp $buf
+       prepare
+               pushargi print_fmt
+               ellipsis
+               pushargi str_cto
+               pushargr %v1
+               pushargr %r1
+               pushargr %r0
+       finishi @printf
+       rshi_u %v0 %v0 1
+       lshi %v2 %v2 1
+       bgti loop %v1 -1
+       ret
+       epilog
+#else
+
+       name main
+main:
+       prolog
+#if __WORDSIZE == 32
+       CLZ(0x80000000, 0)
+       CLO(0xffffffff, 32)
+       CTZ(0x80000000, 31)
+       CTO(0x7fffffff, 31)
+       CLZ(0x40000000, 1)
+       CLO(0xfffffffe, 31)
+       CTZ(0x40000000, 30)
+       CTO(0x3fffffff, 30)
+       CLZ(0x20000000, 2)
+       CLO(0xfffffffc, 30)
+       CTZ(0x20000000, 29)
+       CTO(0x1fffffff, 29)
+       CLZ(0x10000000, 3)
+       CLO(0xfffffff8, 29)
+       CTZ(0x10000000, 28)
+       CTO(0x0fffffff, 28)
+       CLZ(0x08000000, 4)
+       CLO(0xfffffff0, 28)
+       CTZ(0x08000000, 27)
+       CTO(0x07ffffff, 27)
+       CLZ(0x04000000, 5)
+       CLO(0xffffffe0, 27)
+       CTZ(0x04000000, 26)
+       CTO(0x03ffffff, 26)
+       CLZ(0x02000000, 6)
+       CLO(0xffffffc0, 26)
+       CTZ(0x02000000, 25)
+       CTO(0x01ffffff, 25)
+       CLZ(0x01000000, 7)
+       CLO(0xffffff80, 25)
+       CTZ(0x01000000, 24)
+       CTO(0x00ffffff, 24)
+       CLZ(0x00800000, 8)
+       CLO(0xffffff00, 24)
+       CTZ(0x00800000, 23)
+       CTO(0x007fffff, 23)
+       CLZ(0x00400000, 9)
+       CLO(0xfffffe00, 23)
+       CTZ(0x00400000, 22)
+       CTO(0x003fffff, 22)
+       CLZ(0x00200000, 10)
+       CLO(0xfffffc00, 22)
+       CTZ(0x00200000, 21)
+       CTO(0x001fffff, 21)
+       CLZ(0x00100000, 11)
+       CLO(0xfffff800, 21)
+       CTZ(0x00100000, 20)
+       CTO(0x000fffff, 20)
+       CLZ(0x00080000, 12)
+       CLO(0xfffff000, 20)
+       CTZ(0x00080000, 19)
+       CTO(0x0007ffff, 19)
+       CLZ(0x00040000, 13)
+       CLO(0xffffe000, 19)
+       CTZ(0x00040000, 18)
+       CTO(0x0003ffff, 18)
+       CLZ(0x00020000, 14)
+       CLO(0xffffc000, 18)
+       CTZ(0x00020000, 17)
+       CTO(0x0001ffff, 17)
+       CLZ(0x00010000, 15)
+       CLO(0xffff8000, 17)
+       CTZ(0x00010000, 16)
+       CTO(0x0000ffff, 16)
+       CLZ(0x00008000, 16)
+       CLO(0xffff0000, 16)
+       CTZ(0x00008000, 15)
+       CTO(0x00007fff, 15)
+       CLZ(0x00004000, 17)
+       CLO(0xfffe0000, 15)
+       CTZ(0x00004000, 14)
+       CTO(0x00003fff, 14)
+       CLZ(0x00002000, 18)
+       CLO(0xfffc0000, 14)
+       CTZ(0x00002000, 13)
+       CTO(0x00001fff, 13)
+       CLZ(0x00001000, 19)
+       CLO(0xfff80000, 13)
+       CTZ(0x00001000, 12)
+       CTO(0x00000fff, 12)
+       CLZ(0x00000800, 20)
+       CLO(0xfff00000, 12)
+       CTZ(0x00000800, 11)
+       CTO(0x000007ff, 11)
+       CLZ(0x00000400, 21)
+       CLO(0xffe00000, 11)
+       CTZ(0x00000400, 10)
+       CTO(0x000003ff, 10)
+       CLZ(0x00000200, 22)
+       CLO(0xffc00000, 10)
+       CTZ(0x00000200, 9)
+       CTO(0x000001ff, 9)
+       CLZ(0x00000100, 23)
+       CLO(0xff800000, 9)
+       CTZ(0x00000100, 8)
+       CTO(0x000000ff, 8)
+       CLZ(0x00000080, 24)
+       CLO(0xff000000, 8)
+       CTZ(0x00000080, 7)
+       CTO(0x0000007f, 7)
+       CLZ(0x00000040, 25)
+       CLO(0xfe000000, 7)
+       CTZ(0x00000040, 6)
+       CTO(0x0000003f, 6)
+       CLZ(0x00000020, 26)
+       CLO(0xfc000000, 6)
+       CTZ(0x00000020, 5)
+       CTO(0x0000001f, 5)
+       CLZ(0x00000010, 27)
+       CLO(0xf8000000, 5)
+       CTZ(0x00000010, 4)
+       CTO(0x0000000f, 4)
+       CLZ(0x00000008, 28)
+       CLO(0xf0000000, 4)
+       CTZ(0x00000008, 3)
+       CTO(0x00000007, 3)
+       CLZ(0x00000004, 29)
+       CLO(0xe0000000, 3)
+       CTZ(0x00000004, 2)
+       CTO(0x00000003, 2)
+       CLZ(0x00000002, 30)
+       CLO(0xc0000000, 2)
+       CTZ(0x00000002, 1)
+       CTO(0x00000001, 1)
+       CLZ(0x00000001, 31)
+       CLO(0x80000000, 1)
+       CTZ(0x00000001, 0)
+       CTO(0x00000000, 0)
+       CLZ(0x00000000, 32)
+       CLO(0x00000000, 0)
+       CTZ(0x00000000, 32)
+       CTO(0xffffffff, 32)
+#else
+       CLZ(0x8000000000000000, 0)
+       CLO(0xffffffffffffffff, 64)
+       CTZ(0x8000000000000000, 63)
+       CTO(0x7fffffffffffffff, 63)
+       CLZ(0x4000000000000000, 1)
+       CLO(0xfffffffffffffffe, 63)
+       CTZ(0x4000000000000000, 62)
+       CTO(0x3fffffffffffffff, 62)
+       CLZ(0x2000000000000000, 2)
+       CLO(0xfffffffffffffffc, 62)
+       CTZ(0x2000000000000000, 61)
+       CTO(0x1fffffffffffffff, 61)
+       CLZ(0x1000000000000000, 3)
+       CLO(0xfffffffffffffff8, 61)
+       CTZ(0x1000000000000000, 60)
+       CTO(0x0fffffffffffffff, 60)
+       CLZ(0x0800000000000000, 4)
+       CLO(0xfffffffffffffff0, 60)
+       CTZ(0x0800000000000000, 59)
+       CTO(0x07ffffffffffffff, 59)
+       CLZ(0x0400000000000000, 5)
+       CLO(0xffffffffffffffe0, 59)
+       CTZ(0x0400000000000000, 58)
+       CTO(0x03ffffffffffffff, 58)
+       CLZ(0x0200000000000000, 6)
+       CLO(0xffffffffffffffc0, 58)
+       CTZ(0x0200000000000000, 57)
+       CTO(0x01ffffffffffffff, 57)
+       CLZ(0x0100000000000000, 7)
+       CLO(0xffffffffffffff80, 57)
+       CTZ(0x0100000000000000, 56)
+       CTO(0x00ffffffffffffff, 56)
+       CLZ(0x0080000000000000, 8)
+       CLO(0xffffffffffffff00, 56)
+       CTZ(0x0080000000000000, 55)
+       CTO(0x007fffffffffffff, 55)
+       CLZ(0x0040000000000000, 9)
+       CLO(0xfffffffffffffe00, 55)
+       CTZ(0x0040000000000000, 54)
+       CTO(0x003fffffffffffff, 54)
+       CLZ(0x0020000000000000, 10)
+       CLO(0xfffffffffffffc00, 54)
+       CTZ(0x0020000000000000, 53)
+       CTO(0x001fffffffffffff, 53)
+       CLZ(0x0010000000000000, 11)
+       CLO(0xfffffffffffff800, 53)
+       CTZ(0x0010000000000000, 52)
+       CTO(0x000fffffffffffff, 52)
+       CLZ(0x0008000000000000, 12)
+       CLO(0xfffffffffffff000, 52)
+       CTZ(0x0008000000000000, 51)
+       CTO(0x0007ffffffffffff, 51)
+       CLZ(0x0004000000000000, 13)
+       CLO(0xffffffffffffe000, 51)
+       CTZ(0x0004000000000000, 50)
+       CTO(0x0003ffffffffffff, 50)
+       CLZ(0x0002000000000000, 14)
+       CLO(0xffffffffffffc000, 50)
+       CTZ(0x0002000000000000, 49)
+       CTO(0x0001ffffffffffff, 49)
+       CLZ(0x0001000000000000, 15)
+       CLO(0xffffffffffff8000, 49)
+       CTZ(0x0001000000000000, 48)
+       CTO(0x0000ffffffffffff, 48)
+       CLZ(0x0000800000000000, 16)
+       CLO(0xffffffffffff0000, 48)
+       CTZ(0x0000800000000000, 47)
+       CTO(0x00007fffffffffff, 47)
+       CLZ(0x0000400000000000, 17)
+       CLO(0xfffffffffffe0000, 47)
+       CTZ(0x0000400000000000, 46)
+       CTO(0x00003fffffffffff, 46)
+       CLZ(0x0000200000000000, 18)
+       CLO(0xfffffffffffc0000, 46)
+       CTZ(0x0000200000000000, 45)
+       CTO(0x00001fffffffffff, 45)
+       CLZ(0x0000100000000000, 19)
+       CLO(0xfffffffffff80000, 45)
+       CTZ(0x0000100000000000, 44)
+       CTO(0x00000fffffffffff, 44)
+       CLZ(0x0000080000000000, 20)
+       CLO(0xfffffffffff00000, 44)
+       CTZ(0x0000080000000000, 43)
+       CTO(0x000007ffffffffff, 43)
+       CLZ(0x0000040000000000, 21)
+       CLO(0xffffffffffe00000, 43)
+       CTZ(0x0000040000000000, 42)
+       CTO(0x000003ffffffffff, 42)
+       CLZ(0x0000020000000000, 22)
+       CLO(0xffffffffffc00000, 42)
+       CTZ(0x0000020000000000, 41)
+       CTO(0x000001ffffffffff, 41)
+       CLZ(0x0000010000000000, 23)
+       CLO(0xffffffffff800000, 41)
+       CTZ(0x0000010000000000, 40)
+       CTO(0x000000ffffffffff, 40)
+       CLZ(0x0000008000000000, 24)
+       CLO(0xffffffffff000000, 40)
+       CTZ(0x0000008000000000, 39)
+       CTO(0x0000007fffffffff, 39)
+       CLZ(0x0000004000000000, 25)
+       CLO(0xfffffffffe000000, 39)
+       CTZ(0x0000004000000000, 38)
+       CTO(0x0000003fffffffff, 38)
+       CLZ(0x0000002000000000, 26)
+       CLO(0xfffffffffc000000, 38)
+       CTZ(0x0000002000000000, 37)
+       CTO(0x0000001fffffffff, 37)
+       CLZ(0x0000001000000000, 27)
+       CLO(0xfffffffff8000000, 37)
+       CTZ(0x0000001000000000, 36)
+       CTO(0x0000000fffffffff, 36)
+       CLZ(0x0000000800000000, 28)
+       CLO(0xfffffffff0000000, 36)
+       CTZ(0x0000000800000000, 35)
+       CTO(0x00000007ffffffff, 35)
+       CLZ(0x0000000400000000, 29)
+       CLO(0xffffffffe0000000, 35)
+       CTZ(0x0000000400000000, 34)
+       CTO(0x00000003ffffffff, 34)
+       CLZ(0x0000000200000000, 30)
+       CLO(0xffffffffc0000000, 34)
+       CTZ(0x0000000200000000, 33)
+       CTO(0x00000001ffffffff, 33)
+       CLZ(0x0000000100000000, 31)
+       CLO(0xffffffff80000000, 33)
+       CTZ(0x0000000100000000, 32)
+       CTO(0x00000000ffffffff, 32)
+       CLZ(0x0000000080000000, 32)
+       CLO(0xffffffff00000000, 32)
+       CTZ(0x0000000080000000, 31)
+       CTO(0x000000007fffffff, 31)
+       CLZ(0x0000000040000000, 33)
+       CLO(0xfffffffe00000000, 31)
+       CTZ(0x0000000040000000, 30)
+       CTO(0x000000003fffffff, 30)
+       CLZ(0x0000000020000000, 34)
+       CLO(0xfffffffc00000000, 30)
+       CTZ(0x0000000020000000, 29)
+       CTO(0x000000001fffffff, 29)
+       CLZ(0x0000000010000000, 35)
+       CLO(0xfffffff800000000, 29)
+       CTZ(0x0000000010000000, 28)
+       CTO(0x000000000fffffff, 28)
+       CLZ(0x0000000008000000, 36)
+       CLO(0xfffffff000000000, 28)
+       CTZ(0x0000000008000000, 27)
+       CTO(0x0000000007ffffff, 27)
+       CLZ(0x0000000004000000, 37)
+       CLO(0xffffffe000000000, 27)
+       CTZ(0x0000000004000000, 26)
+       CTO(0x0000000003ffffff, 26)
+       CLZ(0x0000000002000000, 38)
+       CLO(0xffffffc000000000, 26)
+       CTZ(0x0000000002000000, 25)
+       CTO(0x0000000001ffffff, 25)
+       CLZ(0x0000000001000000, 39)
+       CLO(0xffffff8000000000, 25)
+       CTZ(0x0000000001000000, 24)
+       CTO(0x0000000000ffffff, 24)
+       CLZ(0x0000000000800000, 40)
+       CLO(0xffffff0000000000, 24)
+       CTZ(0x0000000000800000, 23)
+       CTO(0x00000000007fffff, 23)
+       CLZ(0x0000000000400000, 41)
+       CLO(0xfffffe0000000000, 23)
+       CTZ(0x0000000000400000, 22)
+       CTO(0x00000000003fffff, 22)
+       CLZ(0x0000000000200000, 42)
+       CLO(0xfffffc0000000000, 22)
+       CTZ(0x0000000000200000, 21)
+       CTO(0x00000000001fffff, 21)
+       CLZ(0x0000000000100000, 43)
+       CLO(0xfffff80000000000, 21)
+       CTZ(0x0000000000100000, 20)
+       CTO(0x00000000000fffff, 20)
+       CLZ(0x0000000000080000, 44)
+       CLO(0xfffff00000000000, 20)
+       CTZ(0x0000000000080000, 19)
+       CTO(0x000000000007ffff, 19)
+       CLZ(0x0000000000040000, 45)
+       CLO(0xffffe00000000000, 19)
+       CTZ(0x0000000000040000, 18)
+       CTO(0x000000000003ffff, 18)
+       CLZ(0x0000000000020000, 46)
+       CLO(0xffffc00000000000, 18)
+       CTZ(0x0000000000020000, 17)
+       CTO(0x000000000001ffff, 17)
+       CLZ(0x0000000000010000, 47)
+       CLO(0xffff800000000000, 17)
+       CTZ(0x0000000000010000, 16)
+       CTO(0x000000000000ffff, 16)
+       CLZ(0x0000000000008000, 48)
+       CLO(0xffff000000000000, 16)
+       CTZ(0x0000000000008000, 15)
+       CTO(0x0000000000007fff, 15)
+       CLZ(0x0000000000004000, 49)
+       CLO(0xfffe000000000000, 15)
+       CTZ(0x0000000000004000, 14)
+       CTO(0x0000000000003fff, 14)
+       CLZ(0x0000000000002000, 50)
+       CLO(0xfffc000000000000, 14)
+       CTZ(0x0000000000002000, 13)
+       CTO(0x0000000000001fff, 13)
+       CLZ(0x0000000000001000, 51)
+       CLO(0xfff8000000000000, 13)
+       CTZ(0x0000000000001000, 12)
+       CTO(0x0000000000000fff, 12)
+       CLZ(0x0000000000000800, 52)
+       CLO(0xfff0000000000000, 12)
+       CTZ(0x0000000000000800, 11)
+       CTO(0x00000000000007ff, 11)
+       CLZ(0x0000000000000400, 53)
+       CLO(0xffe0000000000000, 11)
+       CTZ(0x0000000000000400, 10)
+       CTO(0x00000000000003ff, 10)
+       CLZ(0x0000000000000200, 54)
+       CLO(0xffc0000000000000, 10)
+       CTZ(0x0000000000000200, 9)
+       CTO(0x00000000000001ff, 9)
+       CLZ(0x0000000000000100, 55)
+       CLO(0xff80000000000000, 9)
+       CTZ(0x0000000000000100, 8)
+       CTO(0x00000000000000ff, 8)
+       CLZ(0x0000000000000080, 56)
+       CLO(0xff00000000000000, 8)
+       CTZ(0x0000000000000080, 7)
+       CTO(0x000000000000007f, 7)
+       CLZ(0x0000000000000040, 57)
+       CLO(0xfe00000000000000, 7)
+       CTZ(0x0000000000000040, 6)
+       CTO(0x000000000000003f, 6)
+       CLZ(0x0000000000000020, 58)
+       CLO(0xfc00000000000000, 6)
+       CTZ(0x0000000000000020, 5)
+       CTO(0x000000000000001f, 5)
+       CLZ(0x0000000000000010, 59)
+       CLO(0xf800000000000000, 5)
+       CTZ(0x0000000000000010, 4)
+       CTO(0x000000000000000f, 4)
+       CLZ(0x0000000000000008, 60)
+       CLO(0xf000000000000000, 4)
+       CTZ(0x0000000000000008, 3)
+       CTO(0x0000000000000007, 3)
+       CLZ(0x0000000000000004, 61)
+       CLO(0xe000000000000000, 3)
+       CTZ(0x0000000000000004, 2)
+       CTO(0x0000000000000003, 2)
+       CLZ(0x0000000000000002, 62)
+       CLO(0xc000000000000000, 2)
+       CTZ(0x0000000000000002, 1)
+       CTO(0x0000000000000001, 1)
+       CLZ(0x0000000000000001, 63)
+       CLO(0x8000000000000000, 1)
+       CTZ(0x0000000000000001, 0)
+       CTO(0x0000000000000000, 0)
+       CLZ(0x0000000000000000, 64)
+       CLO(0x0000000000000000, 0)
+       CTZ(0x0000000000000000, 64)
+       CTO(0xffffffffffffffff, 64)
+#endif
+       prepare
+               pushargi ok
+       finishi @printf 
+       reti 0
+       epilog
+#endif
index 21068b6..40fb041 100644 (file)
@@ -1,10 +1,10 @@
-#define def_wi(i)                      \
+#define def_wi(i, ii)                  \
        name _w##i                      \
 _w##i:                                 \
        prolog                          \
        name _w##i                      \
 _w##i:                                 \
        prolog                          \
-       arg $arg##i                     \
+       arg##ii $arg##i                 \
        getarg##i %r0 $arg##i           \
        getarg##i %r0 $arg##i           \
-       retr %r0                        \
+       retr##i %r0                     \
        epilog
 #define def_wf(f)                      \
        name _w##f                      \
        epilog
 #define def_wf(f)                      \
        name _w##f                      \
@@ -15,11 +15,11 @@ _w##f:                                      \
        truncr##f %r0 %f0               \
        retr %r0                        \
        epilog
        truncr##f %r0 %f0               \
        retr %r0                        \
        epilog
-#define def_fi(f, i)                   \
+#define def_fi(f, i, ii)               \
        name f##i                       \
 f##i:                                  \
        prolog                          \
        name f##i                       \
 f##i:                                  \
        prolog                          \
-       arg $arg##i                     \
+       arg##ii $arg##i                 \
        getarg##i %r0 $arg##i           \
        extr##f %f0 %r0                 \
        retr##f %f0                     \
        getarg##i %r0 $arg##i           \
        extr##f %f0 %r0                 \
        retr##f %f0                     \
@@ -52,33 +52,33 @@ bstr:
 .code
        jmpi main
 
 .code
        jmpi main
 
-       def_wi(_c)
-       def_wi(_uc)
-       def_wi(_s)
-       def_wi(_us)
+       def_wi(_c, _c)
+       def_wi(_uc, _c)
+       def_wi(_s, _s)
+       def_wi(_us, _s)
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       def_wi(_i)
-       def_wi(_ui)
+       def_wi(_i, _i)
+       def_wi(_ui, _i)
 #endif
        def_wf(_f)
        def_wf(_d)
 #endif
        def_wf(_f)
        def_wf(_d)
-       def_fi(_f, _c)
-       def_fi(_f, _uc)
-       def_fi(_f, _s)
-       def_fi(_f, _us)
-       def_fi(_f, _i)
+       def_fi(_f, _c, _c)
+       def_fi(_f, _uc, _c)
+       def_fi(_f, _s, _s)
+       def_fi(_f, _us, _s)
+       def_fi(_f, _i, _i)
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       def_fi(_f, _ui)
-       def_fi(_f, _l)
+       def_fi(_f, _ui, _i)
+       def_fi(_f, _l, _l)
 #endif
 #endif
-       def_fi(_d, _c)
-       def_fi(_d, _uc)
-       def_fi(_d, _s)
-       def_fi(_d, _us)
-       def_fi(_d, _i)
+       def_fi(_d, _c, _c)
+       def_fi(_d, _uc, _c)
+       def_fi(_d, _s, _s)
+       def_fi(_d, _us, _s)
+       def_fi(_d, _i, _i)
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       def_fi(_d, _ui)
-       def_fi(_d, _l)
+       def_fi(_d, _ui, _i)
+       def_fi(_d, _l, _l)
 #endif
        def_f(_f)
        def_f(_d)
 #endif
        def_f(_f)
        def_f(_d)
@@ -91,7 +91,7 @@ main:
 
 #define _call_w(n, i, a, r)            \
        prepare                         \
 
 #define _call_w(n, i, a, r)            \
        prepare                         \
-               pushargi a              \
+               pushargi##i a           \
        finishi _w##i                   \
        retval %r0                      \
        extr##i %r0 %r0                 \
        finishi _w##i                   \
        retval %r0                      \
        extr##i %r0 %r0                 \
@@ -111,7 +111,7 @@ _w##f##_##n:
 #define call_wf(n, f, a, r)            _call_wf(n, f, a, r)
 #define _call_fi(n, f, i, a, r)                \
        prepare                         \
 #define call_wf(n, f, a, r)            _call_wf(n, f, a, r)
 #define _call_fi(n, f, i, a, r)                \
        prepare                         \
-               pushargi a              \
+               pushargi##i a           \
        finishi f##i                    \
        retval##f %f0                   \
        beqi##f f##i##n %f0 r           \
        finishi f##i                    \
        retval##f %f0                   \
        beqi##f f##i##n %f0 r           \
@@ -196,6 +196,7 @@ f##g##n:
        call_wf(__LINE__, _d, c7f, f7f)
        call_wf(__LINE__, _d, wc80, f80)
        call_wf(__LINE__, _d, wc81, f81)
        call_wf(__LINE__, _d, c7f, f7f)
        call_wf(__LINE__, _d, wc80, f80)
        call_wf(__LINE__, _d, wc81, f81)
+
        call_fi(__LINE__, _f, _c, c7f, f7f)
        call_fi(__LINE__, _f, _c, c80, f80)
        call_fi(__LINE__, _f, _uc, c7f, f7f)
        call_fi(__LINE__, _f, _c, c7f, f7f)
        call_fi(__LINE__, _f, _c, c80, f80)
        call_fi(__LINE__, _f, _uc, c7f, f7f)
index 35b897e..6992db4 100644 (file)
@@ -58,8 +58,8 @@ int
 main(int argc, char *argv[])
 {
     void               (*code)(void);
 main(int argc, char *argv[])
 {
     void               (*code)(void);
-    jit_node_t         *jmp, *pass;
-    jit_node_t          *jw,  *jf,  *jd;
+    jit_node_t         *jmp, *pass, *fail;
+    jit_node_t          *jw, *jf, *jd;
     jit_int32_t                  s1,   s2,   s3,   s4,   s5,   s6,   s7,   s8,
                          s9,  s10,  s11,  s12,  s13,  s14,  s15,  s16;
     jit_node_t          *a1,  *a2,  *a3,  *a4,  *a5,  *a6,  *a7,  *a8,
     jit_int32_t                  s1,   s2,   s3,   s4,   s5,   s6,   s7,   s8,
                          s9,  s10,  s11,  s12,  s13,  s14,  s15,  s16;
     jit_node_t          *a1,  *a2,  *a3,  *a4,  *a5,  *a6,  *a7,  *a8,
@@ -172,10 +172,11 @@ main(int argc, char *argv[])
     LOAD_ARG(16);
 #undef LOAD_ARG
     pass = jit_forward();
     LOAD_ARG(16);
 #undef LOAD_ARG
     pass = jit_forward();
+    fail = jit_forward();
 #define CHECK_ARG(N)                                                   \
     do {                                                               \
        jit_getarg(JIT_R0, a##N);                                       \
 #define CHECK_ARG(N)                                                   \
     do {                                                               \
        jit_getarg(JIT_R0, a##N);                                       \
-       jit_patch_at(jit_beqi(JIT_R0, 17 - N), pass);                   \
+       jit_patch_at(jit_bnei(JIT_R0, 17 - N), fail);                   \
     } while (0)
     CHECK_ARG(1);
     CHECK_ARG(2);
     } while (0)
     CHECK_ARG(1);
     CHECK_ARG(2);
@@ -194,6 +195,8 @@ main(int argc, char *argv[])
     CHECK_ARG(15);
     CHECK_ARG(16);
 #undef CHECK_ARG
     CHECK_ARG(15);
     CHECK_ARG(16);
 #undef CHECK_ARG
+    jit_patch_at(jit_jmpi(), pass);
+    jit_link(fail);
     jit_calli(abort);
     jit_link(pass);
     jit_ret();
     jit_calli(abort);
     jit_link(pass);
     jit_ret();
@@ -300,10 +303,11 @@ main(int argc, char *argv[])
     LOAD_ARG(16);
 #undef LOAD_ARG
     pass = jit_forward();
     LOAD_ARG(16);
 #undef LOAD_ARG
     pass = jit_forward();
+    fail = jit_forward();
 #define CHECK_ARG(N)                                                   \
     do {                                                               \
        jit_getarg_f(JIT_F0, a##N);                                     \
 #define CHECK_ARG(N)                                                   \
     do {                                                               \
        jit_getarg_f(JIT_F0, a##N);                                     \
-       jit_patch_at(jit_beqi_f(JIT_F0, 17 - N), pass);                 \
+       jit_patch_at(jit_bnei_f(JIT_F0, 17 - N), fail);                 \
     } while (0)
     CHECK_ARG(1);
     CHECK_ARG(2);
     } while (0)
     CHECK_ARG(1);
     CHECK_ARG(2);
@@ -322,6 +326,8 @@ main(int argc, char *argv[])
     CHECK_ARG(15);
     CHECK_ARG(16);
 #undef CHECK_ARG
     CHECK_ARG(15);
     CHECK_ARG(16);
 #undef CHECK_ARG
+    jit_patch_at(jit_jmpi(), pass);
+    jit_link(fail);
     jit_calli(abort);
     jit_link(pass);
     jit_ret();
     jit_calli(abort);
     jit_link(pass);
     jit_ret();
@@ -428,10 +434,11 @@ main(int argc, char *argv[])
     LOAD_ARG(16);
 #undef LOAD_ARG
     pass = jit_forward();
     LOAD_ARG(16);
 #undef LOAD_ARG
     pass = jit_forward();
+    fail = jit_forward();
 #define CHECK_ARG(N)                                                   \
     do {                                                               \
        jit_getarg_d(JIT_F0, a##N);                                     \
 #define CHECK_ARG(N)                                                   \
     do {                                                               \
        jit_getarg_d(JIT_F0, a##N);                                     \
-       jit_patch_at(jit_beqi_d(JIT_F0, 17 - N), pass);                 \
+       jit_patch_at(jit_bnei_d(JIT_F0, 17 - N), fail);                 \
     } while (0)
     CHECK_ARG(1);
     CHECK_ARG(2);
     } while (0)
     CHECK_ARG(1);
     CHECK_ARG(2);
@@ -450,6 +457,8 @@ main(int argc, char *argv[])
     CHECK_ARG(15);
     CHECK_ARG(16);
 #undef CHECK_ARG
     CHECK_ARG(15);
     CHECK_ARG(16);
 #undef CHECK_ARG
+    jit_patch_at(jit_jmpi(), pass);
+    jit_link(fail);
     jit_calli(abort);
     jit_link(pass);
     jit_ret();
     jit_calli(abort);
     jit_link(pass);
     jit_ret();
@@ -484,6 +493,7 @@ main(int argc, char *argv[])
        jit_pushargi(1);
     }
     jit_patch_at(jit_finishi(NULL), jw);
        jit_pushargi(1);
     }
     jit_patch_at(jit_finishi(NULL), jw);
+
     jit_prepare();
     {
        jit_pushargi_f(16);
     jit_prepare();
     {
        jit_pushargi_f(16);
@@ -504,6 +514,7 @@ main(int argc, char *argv[])
        jit_pushargi_f(1);
     }
     jit_patch_at(jit_finishi(NULL), jf);
        jit_pushargi_f(1);
     }
     jit_patch_at(jit_finishi(NULL), jf);
+
     jit_prepare();
     {
        jit_pushargi_d(16);
     jit_prepare();
     {
        jit_pushargi_d(16);
index ef09076..e1e2ea7 100644 (file)
@@ -150,7 +150,10 @@ main(int argc, char *argv[])
 #define join(tid)                                              \
     /* load pthread_t value in JIT_R0 */                       \
     jit_movi(JIT_R0, (jit_word_t)tids);                                \
 #define join(tid)                                              \
     /* load pthread_t value in JIT_R0 */                       \
     jit_movi(JIT_R0, (jit_word_t)tids);                                \
-    jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t));         \
+    if (__WORDSIZE == 64 && sizeof(pthread_t) == 4)            \
+       jit_ldxi_i(JIT_R0, JIT_R0, tid * sizeof(pthread_t));    \
+    else                                                       \
+       jit_ldxi(JIT_R0, JIT_R0, tid * sizeof(pthread_t));      \
     jit_prepare();                                             \
     jit_pushargr(JIT_R0);                                      \
     jit_pushargi((jit_word_t)NULL);                            \
     jit_prepare();                                             \
     jit_pushargr(JIT_R0);                                      \
     jit_pushargi((jit_word_t)NULL);                            \
index 9dae256..3491f2e 100644 (file)
 #  define _l15                 _w15
 #endif
 
 #  define _l15                 _w15
 #endif
 
+#ifndef jit_arg_uc
+#  define jit_arg_uc           jit_arg_c
+#endif
+#ifndef jit_arg_us
+#  define jit_arg_us           jit_arg_s
+#endif
+#ifndef jit_arg_ui
+#  define jit_arg_ui           jit_arg_i
+#endif
+
 /*
  * Types
  */
 /*
  * Types
  */
@@ -624,7 +634,7 @@ main(int argc, char *argv[])
 #define arg15(T)               arg14(T)                a15 = jit_arg##T();
 
 #define get0(B,T,R)            jit_movi##B(R##0,0);
 #define arg15(T)               arg14(T)                a15 = jit_arg##T();
 
 #define get0(B,T,R)            jit_movi##B(R##0,0);
-#define get1(B,T,R)            jit_getarg##B(R##0,a##1);
+#define get1(B,T,R)            jit_getarg##T(R##0,a##1);
 #define get2(B,T,R)                                                    \
        get1(B,T,R);                                                    \
        jit_movr##B(R##1, R##0);                                        \
 #define get2(B,T,R)                                                    \
        get1(B,T,R);                                                    \
        jit_movr##B(R##1, R##0);                                        \
@@ -707,7 +717,7 @@ main(int argc, char *argv[])
     n##T##N = jit_name(strfy(n##T##N));                                        \
     jit_note("ccall.c", __LINE__);                                     \
     jit_prolog();                                                      \
     n##T##N = jit_name(strfy(n##T##N));                                        \
     jit_note("ccall.c", __LINE__);                                     \
     jit_prolog();                                                      \
-    arg##N();                                                          \
+    arg##N(T);                                                         \
     get##N(,T,JIT_R)                                                   \
     jit_extr##T(JIT_R0, JIT_R0);                                       \
     jit_retr(JIT_R0);                                                  \
     get##N(,T,JIT_R)                                                   \
     jit_extr##T(JIT_R0, JIT_R0);                                       \
     jit_retr(JIT_R0);                                                  \
@@ -777,7 +787,7 @@ main(int argc, char *argv[])
 
 #define calin(T,N)                                                     \
        jit_prepare();                                                  \
 
 #define calin(T,N)                                                     \
        jit_prepare();                                                  \
-               push##N(                                              \
+               push##N(T)                                              \
        jit_finishi(C##T##N);                                           \
        jit_retval##T(JIT_R0);                                          \
        jmp = jit_beqi(JIT_R0, T##N);                                   \
        jit_finishi(C##T##N);                                           \
        jit_retval##T(JIT_R0);                                          \
        jmp = jit_beqi(JIT_R0, T##N);                                   \
@@ -826,7 +836,7 @@ main(int argc, char *argv[])
 #undef calfn
 #define calin(T,N)                                                     \
        jit_prepare();                                                  \
 #undef calfn
 #define calin(T,N)                                                     \
        jit_prepare();                                                  \
-               push##N(                                              \
+               push##N(T)                                              \
        jit_finishi(CJ##T##N);                                          \
        jit_retval##T(JIT_R0);                                          \
        jmp = jit_beqi(JIT_R0, T##N);                                   \
        jit_finishi(CJ##T##N);                                          \
        jit_retval##T(JIT_R0);                                          \
        jmp = jit_beqi(JIT_R0, T##N);                                   \
diff --git a/deps/lightning/check/factorial.tst b/deps/lightning/check/factorial.tst
new file mode 100644 (file)
index 0000000..68adbb2
--- /dev/null
@@ -0,0 +1,73 @@
+.data  32
+str:
+.c     "%.0lf\n"
+.code
+       jmpi main
+/*
+ *     double factorial(unsigned long n) {
+ *             double r = 1;
+ *             while (n > 1) {
+ *                     r *= n;
+ *                     --n;
+ *             }
+ *             return r;
+ *     }
+ */
+factorial:
+       prolog
+       arg $n
+       getarg %r0 $n
+       movi_d %f0 1.0
+       extr_d %f1 %r0
+       movr_d %f2 %f0
+loop:
+       bltr_d done %f1 %f2
+       mulr_d %f0 %f0 %f1
+       subr_d %f1 %f1 %f2
+       jmpi loop
+done:
+       retr_d %f0
+       epilog
+
+/*
+ *     int main(int argc, char *argv[]) {
+ *             unsigned long v;
+ *             double d;
+ *             if (argc == 2)
+ *                     v = strtoul(argv[1], NULL, 0);
+ *             else
+ *                     v = 32;
+ *             d = factorial(v);
+ *             printf("%.0lf\n", d);
+ *             return 0;
+ *     }
+ */
+main:
+       prolog
+       arg $argc
+       arg $argv
+       getarg %r0 $argc
+       bnei default %r0 2
+       getarg %v0 $argv
+       ldxi %r0 %v0 $(__WORDSIZE >> 3)
+       prepare
+               pushargr %r0
+               pushargi 0
+               pushargi 0
+       finishi @strtoul
+       retval %v0
+       jmpi call
+default:
+    movi %v0 32
+call:
+       prepare
+               pushargr %v0
+       finishi factorial
+       retval_d %f0
+       prepare
+               pushargi str
+               ellipsis
+               pushargr_d %f0
+       finishi @printf
+       reti 0
+       epilog
index 0835323..926ee81 100644 (file)
@@ -32,7 +32,7 @@ main:
        arg $argc
        arg $argv
 
        arg $argc
        arg $argv
 
-       getarg_i %r0 $argc
+       getarg %r0 $argc
        blei default %r0 1
        getarg %r0 $argv
        addi %r0 %r0 $(__WORDSIZE >> 3)
        blei default %r0 1
        getarg %r0 $argv
        addi %r0 %r0 $(__WORDSIZE >> 3)
index 05a0889..69a6caf 100644 (file)
@@ -14,9 +14,9 @@ ok:
 #  define x80                  0x8000000000000000
 #endif
 
 #  define x80                  0x8000000000000000
 #endif
 
-#if __mips__ || __sparc__ || __hppa__ || __riscv
+#if (__mips__ && __mips_isa_rev < 6)  || __sparc__ || __hppa__ || __riscv
 #  define wnan                 x7f
 #  define wnan                 x7f
-#elif __arm__ || __aarch64__ || __alpha__ || __loongarch__
+#elif (__mips__ && __mips_isa_rev >= 6) || __arm__ || __aarch64__ || __alpha__ || __loongarch__
 #  define wnan                 0
 #else
 #  define wnan                 x80
 #  define wnan                 0
 #else
 #  define wnan                 x80
index 4f3b052..80ea081 100644 (file)
@@ -270,10 +270,16 @@ static jit_pointer_t get_arg(void);
 static jit_word_t get_imm(void);
 static void live(void);
 static void align(void);       static void name(void);
 static jit_word_t get_imm(void);
 static void live(void);
 static void align(void);       static void name(void);
+static void skip(void);
 static void prolog(void);
 static void frame(void);       static void tramp(void);
 static void ellipsis(void);
 static void allocai(void);     static void allocar(void);
 static void prolog(void);
 static void frame(void);       static void tramp(void);
 static void ellipsis(void);
 static void allocai(void);     static void allocar(void);
+static void arg_c(void);       static void arg_s(void);
+static void arg_i(void);
+#if __WORDSIZE == 64
+static void arg_l(void);
+#endif
 static void arg(void);
 static void getarg_c(void);    static void getarg_uc(void);
 static void getarg_s(void);    static void getarg_us(void);
 static void arg(void);
 static void getarg_c(void);    static void getarg_uc(void);
 static void getarg_s(void);    static void getarg_us(void);
@@ -282,6 +288,15 @@ static void getarg_i(void);
 static void getarg_ui(void);   static void getarg_l(void);
 #endif
 static void getarg(void);
 static void getarg_ui(void);   static void getarg_l(void);
 #endif
 static void getarg(void);
+static void putargr_c(void);   static void putargi_c(void);
+static void putargr_uc(void);  static void putargi_uc(void);
+static void putargr_s(void);   static void putargi_s(void);
+static void putargr_us(void);  static void putargi_us(void);
+static void putargr_i(void);   static void putargi_i(void);
+#if __WORDSIZE == 64
+static void putargr_ui(void);  static void putargi_ui(void);
+static void putargr_l(void);   static void putargi_l(void);
+#endif
 static void putargr(void);     static void putargi(void);
 static void addr(void);                static void addi(void);
 static void addxr(void);       static void addxi(void);
 static void putargr(void);     static void putargi(void);
 static void addr(void);                static void addi(void);
 static void addxr(void);       static void addxi(void);
@@ -306,6 +321,8 @@ static void lshr(void);             static void lshi(void);
 static void rshr(void);                static void rshi(void);
 static void rshr_u(void);      static void rshi_u(void);
 static void negr(void);                static void comr(void);
 static void rshr(void);                static void rshi(void);
 static void rshr_u(void);      static void rshi_u(void);
 static void negr(void);                static void comr(void);
+static void clor(void);                static void clzr(void);
+static void ctor(void);                static void ctzr(void);
 static void ltr(void);         static void lti(void);
 static void ltr_u(void);       static void lti_u(void);
 static void ler(void);         static void lei(void);
 static void ltr(void);         static void lti(void);
 static void ltr_u(void);       static void lti_u(void);
 static void ler(void);         static void lei(void);
@@ -392,9 +409,30 @@ static void bxsubr_u(void);        static void bxsubi_u(void);
 static void jmpr(void);                static void jmpi(void);
 static void callr(void);       static void calli(void);
 static void prepare(void);
 static void jmpr(void);                static void jmpi(void);
 static void callr(void);       static void calli(void);
 static void prepare(void);
+
+static void pushargr_c(void);  static void pushargi_c(void);
+static void pushargr_uc(void); static void pushargi_uc(void);
+static void pushargr_s(void);  static void pushargi_s(void);
+static void pushargr_us(void); static void pushargi_us(void);
+static void pushargr_i(void);  static void pushargi_i(void);
+#if __WORDSIZE == 64
+static void pushargr_ui(void); static void pushargi_ui(void);
+static void pushargr_l(void);  static void pushargi_l(void);
+#endif
 static void pushargr(void);    static void pushargi(void);
 static void pushargr(void);    static void pushargi(void);
+
 static void finishr(void);     static void finishi(void);
 static void ret(void);
 static void finishr(void);     static void finishi(void);
 static void ret(void);
+
+static void retr_c(void);      static void reti_c(void);
+static void retr_uc(void);     static void reti_uc(void);
+static void retr_s(void);      static void reti_s(void);
+static void retr_us(void);     static void reti_us(void);
+static void retr_i(void);      static void reti_i(void);
+#if __WORDSIZE == 64
+static void retr_ui(void);     static void reti_ui(void);
+static void retr_l(void);      static void reti_l(void);
+#endif
 static void retr(void);                static void reti(void);
 static void retval_c(void);    static void retval_uc(void);
 static void retval_s(void);    static void retval_us(void);
 static void retr(void);                static void reti(void);
 static void retval_c(void);    static void retval_uc(void);
 static void retval_s(void);    static void retval_us(void);
@@ -591,10 +629,16 @@ static instr_t              instr_vector[] = {
 #define entry2(name, function) { NULL, name, function }
     entry(live),
     entry(align),      entry(name),
 #define entry2(name, function) { NULL, name, function }
     entry(live),
     entry(align),      entry(name),
+    entry(skip),
     entry(prolog),
     entry(frame),      entry(tramp),
     entry(ellipsis),
     entry(allocai),    entry(allocar),
     entry(prolog),
     entry(frame),      entry(tramp),
     entry(ellipsis),
     entry(allocai),    entry(allocar),
+    entry(arg_c),      entry(arg_s),
+    entry(arg_i),
+#if __WORDSIZE == 64
+    entry(arg_l),
+#endif
     entry(arg),
     entry(getarg_c),   entry(getarg_uc),
     entry(getarg_s),   entry(getarg_us),
     entry(arg),
     entry(getarg_c),   entry(getarg_uc),
     entry(getarg_s),   entry(getarg_us),
@@ -603,6 +647,16 @@ static instr_t               instr_vector[] = {
     entry(getarg_ui),  entry(getarg_l),
 #endif
     entry(getarg),
     entry(getarg_ui),  entry(getarg_l),
 #endif
     entry(getarg),
+
+    entry(putargr_c),  entry(putargi_c),
+    entry(putargr_uc), entry(putargi_uc),
+    entry(putargr_s),  entry(putargi_s),
+    entry(putargr_us), entry(putargi_us),
+    entry(putargr_i),  entry(putargi_i),
+#if __WORDSIZE == 64
+    entry(putargr_ui), entry(putargi_ui),
+    entry(putargr_l),  entry(putargi_l),
+#endif
     entry(putargr),    entry(putargi),
     entry(addr),       entry(addi),
     entry(addxr),      entry(addxi),
     entry(putargr),    entry(putargi),
     entry(addr),       entry(addi),
     entry(addxr),      entry(addxi),
@@ -627,6 +681,8 @@ static instr_t                instr_vector[] = {
     entry(rshr),       entry(rshi),
     entry(rshr_u),     entry(rshi_u),
     entry(negr),       entry(comr),
     entry(rshr),       entry(rshi),
     entry(rshr_u),     entry(rshi_u),
     entry(negr),       entry(comr),
+    entry(clor),       entry(clzr),
+    entry(ctor),       entry(ctzr),
     entry(ltr),                entry(lti),
     entry(ltr_u),      entry(lti_u),
     entry(ler),                entry(lei),
     entry(ltr),                entry(lti),
     entry(ltr_u),      entry(lti_u),
     entry(ler),                entry(lei),
@@ -713,9 +769,27 @@ static instr_t               instr_vector[] = {
     entry(jmpr),       entry(jmpi),
     entry(callr),      entry(calli),
     entry(prepare),
     entry(jmpr),       entry(jmpi),
     entry(callr),      entry(calli),
     entry(prepare),
+    entry(pushargr_c), entry(pushargi_c),
+    entry(pushargr_uc),        entry(pushargi_uc),
+    entry(pushargr_s), entry(pushargi_s),
+    entry(pushargr_us),        entry(pushargi_us),
+    entry(pushargr_i), entry(pushargi_i),
+#if __WORDSIZE == 64
+    entry(pushargr_ui),        entry(pushargi_ui),
+    entry(pushargr_l), entry(pushargi_l),
+#endif
     entry(pushargr),   entry(pushargi),
     entry(finishr),    entry(finishi),
     entry(ret),
     entry(pushargr),   entry(pushargi),
     entry(finishr),    entry(finishi),
     entry(ret),
+    entry(retr_c),     entry(reti_c),
+    entry(retr_uc),    entry(reti_uc),
+    entry(retr_s),     entry(reti_s),
+    entry(retr_us),    entry(reti_us),
+    entry(retr_i),     entry(reti_i),
+#if __WORDSIZE == 64
+    entry(retr_ui),    entry(reti_ui),
+    entry(retr_l),     entry(reti_l),
+#endif
     entry(retr),       entry(reti),
     entry(retval_c),   entry(retval_uc),
     entry(retval_s),   entry(retval_us),
     entry(retr),       entry(reti),
     entry(retval_c),   entry(retval_uc),
     entry(retval_s),   entry(retval_us),
@@ -1400,6 +1474,7 @@ live(void) {
     jit_live(parser.regval);
 }
 entry_im(align)
     jit_live(parser.regval);
 }
 entry_im(align)
+entry_im(skip)
 entry(prolog)
 entry_im(frame)                        entry_im(tramp)
 entry(ellipsis)
 entry(prolog)
 entry_im(frame)                        entry_im(tramp)
 entry(ellipsis)
@@ -1413,6 +1488,11 @@ allocai(void) {
     symbol->value.i = i;
 }
 entry_ir_ir(allocar)
     symbol->value.i = i;
 }
 entry_ir_ir(allocar)
+entry_ca(arg_c)                        entry_ca(arg_s)
+entry_ca(arg_i)
+#if __WORDSIZE == 64
+entry_ca(arg_l)
+#endif
 entry_ca(arg)
 entry_ia(getarg_c)             entry_ia(getarg_uc)
 entry_ia(getarg_s)             entry_ia(getarg_us)
 entry_ca(arg)
 entry_ia(getarg_c)             entry_ia(getarg_uc)
 entry_ia(getarg_s)             entry_ia(getarg_us)
@@ -1421,6 +1501,15 @@ entry_ia(getarg_i)
 entry_ia(getarg_ui)            entry_ia(getarg_l)
 #endif
 entry_ia(getarg)
 entry_ia(getarg_ui)            entry_ia(getarg_l)
 #endif
 entry_ia(getarg)
+entry_ia(putargr_c)            entry_ima(putargi_c)
+entry_ia(putargr_uc)           entry_ima(putargi_uc)
+entry_ia(putargr_s)            entry_ima(putargi_s)
+entry_ia(putargr_us)           entry_ima(putargi_us)
+entry_ia(putargr_i)            entry_ima(putargi_i)
+#if __WORDSIZE == 64
+entry_ia(putargr_ui)           entry_ima(putargi_ui)
+entry_ia(putargr_l)            entry_ima(putargi_l)
+#endif
 entry_ia(putargr)              entry_ima(putargi)
 entry_ir_ir_ir(addr)           entry_ir_ir_im(addi)
 entry_ir_ir_ir(addxr)          entry_ir_ir_im(addxi)
 entry_ia(putargr)              entry_ima(putargi)
 entry_ir_ir_ir(addr)           entry_ir_ir_im(addi)
 entry_ir_ir_ir(addxr)          entry_ir_ir_im(addxi)
@@ -1445,6 +1534,8 @@ entry_ir_ir_ir(lshr)              entry_ir_ir_im(lshi)
 entry_ir_ir_ir(rshr)           entry_ir_ir_im(rshi)
 entry_ir_ir_ir(rshr_u)         entry_ir_ir_im(rshi_u)
 entry_ir_ir(negr)              entry_ir_ir(comr)
 entry_ir_ir_ir(rshr)           entry_ir_ir_im(rshi)
 entry_ir_ir_ir(rshr_u)         entry_ir_ir_im(rshi_u)
 entry_ir_ir(negr)              entry_ir_ir(comr)
+entry_ir_ir(clor)              entry_ir_ir(clzr)
+entry_ir_ir(ctor)              entry_ir_ir(ctzr)
 entry_ir_ir_ir(ltr)            entry_ir_ir_im(lti)
 entry_ir_ir_ir(ltr_u)          entry_ir_ir_im(lti_u)
 entry_ir_ir_ir(ler)            entry_ir_ir_im(lei)
 entry_ir_ir_ir(ltr)            entry_ir_ir_im(lti)
 entry_ir_ir_ir(ltr_u)          entry_ir_ir_im(lti_u)
 entry_ir_ir_ir(ler)            entry_ir_ir_im(lei)
@@ -1578,9 +1669,27 @@ entry_lb_ir_ir(bxsubr_u) entry_lb_ir_im(bxsubi_u)
 entry_ir(jmpr)                 entry_lb(jmpi)
 entry_ir(callr)                        entry_fn(calli)
 entry(prepare)
 entry_ir(jmpr)                 entry_lb(jmpi)
 entry_ir(callr)                        entry_fn(calli)
 entry(prepare)
+entry_ir(pushargr_c)           entry_im(pushargi_c)
+entry_ir(pushargr_uc)          entry_im(pushargi_uc)
+entry_ir(pushargr_s)           entry_im(pushargi_s)
+entry_ir(pushargr_us)          entry_im(pushargi_us)
+entry_ir(pushargr_i)           entry_im(pushargi_i)
+#if __WORDSIZE == 64
+entry_ir(pushargr_ui)          entry_im(pushargi_ui)
+entry_ir(pushargr_l)           entry_im(pushargi_l)
+#endif
 entry_ir(pushargr)             entry_im(pushargi)
 entry_ir(finishr)              entry_fn(finishi)
 entry(ret)
 entry_ir(pushargr)             entry_im(pushargi)
 entry_ir(finishr)              entry_fn(finishi)
 entry(ret)
+entry_ir(retr_c)               entry_im(reti_c)
+entry_ir(retr_uc)              entry_im(reti_uc)
+entry_ir(retr_s)               entry_im(reti_s)
+entry_ir(retr_us)              entry_im(reti_us)
+entry_ir(retr_i)               entry_im(reti_i)
+#if __WORDSIZE == 64
+entry_ir(retr_ui)              entry_im(reti_ui)
+entry_ir(retr_l)               entry_im(reti_l)
+#endif
 entry_ir(retr)                 entry_im(reti)
 entry_ir(retval_c)             entry_ir(retval_uc)
 entry_ir(retval_s)             entry_ir(retval_us)
 entry_ir(retr)                 entry_im(reti)
 entry_ir(retval_c)             entry_ir(retval_uc)
 entry_ir(retval_s)             entry_ir(retval_us)
@@ -4257,6 +4366,9 @@ main(int argc, char *argv[])
     opt_short += snprintf(cmdline + opt_short,
                          sizeof(cmdline) - opt_short,
                          " -D__mips__=1");
     opt_short += snprintf(cmdline + opt_short,
                          sizeof(cmdline) - opt_short,
                          " -D__mips__=1");
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__mips_isa_rev=%d", jit_cpu.release);
 #endif
 #if defined(__arm__)
     opt_short += snprintf(cmdline + opt_short,
 #endif
 #if defined(__arm__)
     opt_short += snprintf(cmdline + opt_short,
diff --git a/deps/lightning/check/protect.c b/deps/lightning/check/protect.c
new file mode 100644 (file)
index 0000000..f5ec740
--- /dev/null
@@ -0,0 +1,59 @@
+/*
+ * Simple test of (un)protecting a code buffer.
+ */
+
+#include <lightning.h>
+#include <stdio.h>
+#include <assert.h>
+
+#define MARKER 10
+
+int
+main(int argc, char *argv[])
+{
+    jit_state_t                 *_jit;
+    jit_node_t           *load, *label, *ok;
+    unsigned char        *ptr;
+    void               (*function)(void);
+    int                          mmap_prot, mmap_flags;
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    jit_prolog();
+
+    load = jit_movi(JIT_R0, 0);
+    jit_ldr_c(JIT_R0, JIT_R0);
+    ok = jit_forward();
+    jit_patch_at(jit_beqi(JIT_R0, MARKER), ok);
+    jit_prepare();
+    jit_pushargi(1);
+    jit_finishi(exit);
+    label = jit_indirect();
+    jit_skip(1);             /* Reserves enough space for a byte.  */
+    jit_patch_at(load, label);
+    jit_link(ok);
+    jit_prepare();
+    jit_pushargi((jit_word_t)"%s\n");
+    jit_ellipsis();
+    jit_pushargi((jit_word_t)"ok");
+    jit_finishi(printf);
+
+    function = jit_emit();
+    if (function == NULL)
+       abort();
+
+    jit_unprotect ();
+    ptr = jit_address (label);
+    *ptr = MARKER;
+    jit_protect ();
+
+    jit_clear_state();
+
+    (*function)();
+
+    jit_destroy_state();
+    finish_jit();
+
+    return (0);
+}
index a7e39e1..65f1580 100644 (file)
@@ -9,49 +9,49 @@ ok:
 putr:
        prolog
        frame 160
 putr:
        prolog
        frame 160
-       arg $ac
-       arg $auc
-       arg $as
-       arg $aus
-       arg $ai
+       arg_c $ac
+       arg_c $auc
+       arg_s $as
+       arg_s $aus
+       arg_i $ai
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       arg $aui
-       arg $al
+       arg_i $aui
+       arg_l $al
 #endif
        arg_f $af
        arg_d $ad
        arg $a
 #if __WORDSIZE == 64
 #endif
        arg_f $af
        arg_d $ad
        arg $a
 #if __WORDSIZE == 64
-       arg $_l
-       arg $_ui
+       arg_l $_l
+       arg_i $_ui
 #endif
 #endif
-       arg $_i
-       arg $_us
-       arg $_s
-       arg $_uc
-       arg $_c
+       arg_i $_i
+       arg_s $_us
+       arg_s $_s
+       arg_c $_uc
+       arg_c $_c
        getarg_c %r0 $ac
        negr %r0 %r0
        getarg_c %r0 $ac
        negr %r0 %r0
-       putargr %r0 $ac
+       putargr_c %r0 $ac
        getarg_uc %r0 $auc
        negr %r0 %r0
        getarg_uc %r0 $auc
        negr %r0 %r0
-       putargr %r0 $auc
+       putargr_uc %r0 $auc
        getarg_s %r0 $as
        negr %r0 %r0
        getarg_s %r0 $as
        negr %r0 %r0
-       putargr %r0 $as
+       putargr_s %r0 $as
        getarg_us %r0 $aus
        negr %r0 %r0
        getarg_us %r0 $aus
        negr %r0 %r0
-       putargr %r0 $aus
+       putargr_us %r0 $aus
        getarg_i %r0 $ai
        negr %r0 %r0
        getarg_i %r0 $ai
        negr %r0 %r0
-       putargr %r0 $ai
+       putargr_i %r0 $ai
 #if __WORDSIZE == 64
        getarg_ui %r0 $aui
        negr %r0 %r0
 #if __WORDSIZE == 64
        getarg_ui %r0 $aui
        negr %r0 %r0
-       putargr %r0 $aui
+       putargr_ui %r0 $aui
        getarg_l %r0 $al
        negr %r0 %r0
        getarg_l %r0 $al
        negr %r0 %r0
-       putargr %r0 $al
+       putargr_l %r0 $al
 #endif
        getarg_f %f0 $af
        negr_f %f0 %f0
 #endif
        getarg_f %f0 $af
        negr_f %f0 %f0
@@ -65,49 +65,49 @@ putr:
 #if __WORDSIZE == 64
        getarg_l %r0 $_l
        negr %r0 %r0
 #if __WORDSIZE == 64
        getarg_l %r0 $_l
        negr %r0 %r0
-       putargr %r0 $_l
+       putargr_l %r0 $_l
        getarg_ui %r0 $_ui
        negr %r0 %r0
        getarg_ui %r0 $_ui
        negr %r0 %r0
-       putargr %r0 $_ui
+       putargr_ui %r0 $_ui
 #endif
        getarg_i %r0 $_i
        negr %r0 %r0
 #endif
        getarg_i %r0 $_i
        negr %r0 %r0
-       putargr %r0 $_i
+       putargr_i %r0 $_i
        getarg_us %r0 $_us
        negr %r0 %r0
        getarg_us %r0 $_us
        negr %r0 %r0
-       putargr %r0 $_us
+       putargr_us %r0 $_us
        getarg_s %r0 $_s
        negr %r0 %r0
        getarg_s %r0 $_s
        negr %r0 %r0
-       putargr %r0 $_s
+       putargr_s %r0 $_s
        getarg_uc %r0 $_uc
        negr %r0 %r0
        getarg_uc %r0 $_uc
        negr %r0 %r0
-       putargr %r0 $_uc
+       putargr_uc %r0 $_uc
        getarg_c %r0 $_c
        negr %r0 %r0
        getarg_c %r0 $_c
        negr %r0 %r0
-       putargr %r0 $_c
+       putargr_c %r0 $_c
        jmpi _putr
 rputr:
        jmpi _putr
 rputr:
-       putargi 17 $ac
-       putargi 16 $auc
-       putargi 15 $as
-       putargi 14 $aus
-       putargi 13 $ai
+       putargi_c 17 $ac
+       putargi_uc 16 $auc
+       putargi_s 15 $as
+       putargi_us 14 $aus
+       putargi_i 13 $ai
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       putargi 12 $aui
-       putargi 11 $al
+       putargi_ui 12 $aui
+       putargi_l 11 $al
 #endif
        putargi_f 10 $af
        putargi_d 9 $ad
        putargi 8 $a
 #if __WORDSIZE == 64
 #endif
        putargi_f 10 $af
        putargi_d 9 $ad
        putargi 8 $a
 #if __WORDSIZE == 64
-       putargi 7 $_l
-       putargi 6 $_ui
+       putargi_l 7 $_l
+       putargi_ui 6 $_ui
 #endif
 #endif
-       putargi 5 $_i
-       putargi 4 $_us
-       putargi 3 $_s
-       putargi 2 $_uc
-       putargi 1 $_c
+       putargi_i 5 $_i
+       putargi_us 4 $_us
+       putargi_s 3 $_s
+       putargi_uc 2 $_uc
+       putargi_c 1 $_c
        jmpi _puti
 rputi:
        ret
        jmpi _puti
 rputi:
        ret
@@ -117,27 +117,27 @@ rputi:
 _putr:
        prolog
        tramp 160
 _putr:
        prolog
        tramp 160
-       arg $ac
-       arg $auc
-       arg $as
-       arg $aus
-       arg $ai
+       arg_c $ac
+       arg_c $auc
+       arg_s $as
+       arg_s $aus
+       arg_i $ai
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       arg $aui
-       arg $al
+       arg_i $aui
+       arg_l $al
 #endif
        arg_f $af
        arg_d $ad
        arg $a
 #if __WORDSIZE == 64
 #endif
        arg_f $af
        arg_d $ad
        arg $a
 #if __WORDSIZE == 64
-       arg $_l
-       arg $_ui
+       arg_l $_l
+       arg_i $_ui
 #endif
 #endif
-       arg $_i
-       arg $_us
-       arg $_s
-       arg $_uc
-       arg $_c
+       arg_i $_i
+       arg_s $_us
+       arg_s $_s
+       arg_c $_uc
+       arg_c $_c
        getarg_c %r0 $ac
        beqi rac %r0 -1
        calli @abort
        getarg_c %r0 $ac
        beqi rac %r0 -1
        calli @abort
@@ -181,7 +181,7 @@ rad:
        calli @abort
 ra:
 #if __WORDSIZE == 64
        calli @abort
 ra:
 #if __WORDSIZE == 64
-       getarg %r0 $_l
+       getarg_l %r0 $_l
        beqi r_l %r0 -11
        calli @abort
 r_l:
        beqi r_l %r0 -11
        calli @abort
 r_l:
@@ -217,27 +217,27 @@ r_c:
 _puti:
        prolog
        tramp 160
 _puti:
        prolog
        tramp 160
-       arg $ac
-       arg $auc
-       arg $as
-       arg $aus
-       arg $ai
+       arg_c $ac
+       arg_c $auc
+       arg_s $as
+       arg_s $aus
+       arg_i $ai
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       arg $aui
-       arg $al
+       arg_i $aui
+       arg_l $al
 #endif
        arg_f $af
        arg_d $ad
        arg $a
 #if __WORDSIZE == 64
 #endif
        arg_f $af
        arg_d $ad
        arg $a
 #if __WORDSIZE == 64
-       arg $_l
-       arg $_ui
+       arg_l $_l
+       arg_i $_ui
 #endif
 #endif
-       arg $_i
-       arg $_us
-       arg $_s
-       arg $_uc
-       arg $_c
+       arg_i $_i
+       arg_s $_us
+       arg_s $_s
+       arg_c $_uc
+       arg_c $_c
        getarg_c %r0 $ac
        beqi iac %r0 17
        calli @abort
        getarg_c %r0 $ac
        beqi iac %r0 17
        calli @abort
@@ -281,7 +281,7 @@ iad:
        calli @abort
 ia:
 #if __WORDSIZE == 64
        calli @abort
 ia:
 #if __WORDSIZE == 64
-       getarg %r0 $_l
+       getarg_l %r0 $_l
        beqi i_l %r0 7
        calli @abort
 i_l:
        beqi i_l %r0 7
        calli @abort
 i_l:
@@ -390,27 +390,27 @@ fd2:
 main:
        prolog
        prepare
 main:
        prolog
        prepare
-               pushargi 1
-               pushargi 2
-               pushargi 3
-               pushargi 4
-               pushargi 5
+               pushargi_c  1
+               pushargi_uc 2
+               pushargi_s  3
+               pushargi_us 4
+               pushargi_i  5
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-               pushargi 6
-               pushargi 7
+               pushargi_ui 6
+               pushargi_l  7
 #endif
 #endif
-               pushargi_f 8
-               pushargi_d 9
-               pushargi 10
+               pushargi_f  8
+               pushargi_d  9
+               pushargi    10
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-               pushargi 11
-               pushargi 12
+               pushargi_l  11
+               pushargi_ui 12
 #endif
 #endif
-               pushargi 13
-               pushargi 14
-               pushargi 15
-               pushargi 16
-               pushargi 17
+               pushargi_i  13
+               pushargi_us 14
+               pushargi_s  15
+               pushargi_uc 16
+               pushargi_c  17
        finishi putr
        prepare
                pushargi 1
        finishi putr
        prepare
                pushargi 1
diff --git a/deps/lightning/check/riprel.c b/deps/lightning/check/riprel.c
new file mode 100644 (file)
index 0000000..c776e2a
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+ * Simple test for x86_64 rip relative access that can also be useful
+ * on other ports when data is close to instruction pointer.
+ */
+
+#include <lightning.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/mman.h>
+#if defined(__sgi)
+#  include <fcntl.h>
+#endif
+#include <unistd.h>
+
+#ifndef MAP_ANON
+#  define MAP_ANON                     MAP_ANONYMOUS
+#  ifndef MAP_ANONYMOUS
+#    define MAP_ANONYMOUS              0
+#  endif
+#endif
+
+#if !defined(__sgi)
+#define  mmap_fd                       -1
+#endif
+
+int
+main(int argc, char *argv[])
+{
+    jit_uint8_t                 *ptr;
+    jit_state_t                 *_jit;
+    jit_word_t           length;
+#if defined(__sgi)
+    int                          mmap_fd;
+#endif
+    void               (*function)(void);
+    int                          mmap_prot, mmap_flags, result, pagesize;
+    int                          mult;
+
+#if defined(__ia64__)
+    mult = 8;
+#else
+    mult = 2;
+#endif
+    pagesize = sysconf(_SC_PAGESIZE);
+    if (pagesize < 4096)
+       pagesize = 4096;
+
+#if defined(__sgi)
+    mmap_fd = open("/dev/zero", O_RDWR);
+#endif
+
+    mmap_prot = PROT_READ | PROT_WRITE;
+#if !(__OpenBSD__ || __APPLE__)
+    mmap_prot |= PROT_EXEC;
+#endif
+#if __NetBSD__
+    mmap_prot = PROT_MPROTECT(mmap_prot);
+    mmap_flags = 0;
+#else
+    mmap_flags = MAP_PRIVATE;
+#endif
+    mmap_flags |= MAP_ANON;
+    ptr = mmap(NULL, pagesize * mult,  mmap_prot, mmap_flags, mmap_fd, 0);
+    assert(ptr != MAP_FAILED);
+#if defined(__sgi)
+    close(mmap_fd);
+#endif
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+
+    jit_prolog();
+    jit_movi(JIT_R0, 'c');
+    jit_sti_c(ptr + 0, JIT_R0);
+    jit_movi(JIT_R0, 'C');
+    jit_sti_c(ptr + 1, JIT_R0);
+
+    jit_movi(JIT_R0, 's');
+    jit_sti_s(ptr + 2, JIT_R0);
+    jit_movi(JIT_R0, 'S');
+    jit_sti_s(ptr + 4, JIT_R0);
+
+    jit_movi(JIT_R0, 'i');
+    jit_sti_i(ptr + 8, JIT_R0);
+#if __WORDSIZE == 64
+    jit_movi(JIT_R0, 'I');
+    jit_sti_i(ptr + 12, JIT_R0);
+
+    jit_movi(JIT_R0, 'l');
+    jit_sti_l(ptr + 16, JIT_R0);
+#endif
+    jit_movi_f(JIT_F0, 1.0);
+    jit_sti_f(ptr + 24, JIT_F0);
+    jit_movi_d(JIT_F0, 2.0);
+    jit_sti_d(ptr + 32, JIT_F0);
+
+    jit_ldi_c(JIT_R0, ptr + 0);
+    jit_ldi_s(JIT_R1, ptr + 2);
+    jit_ldi_i(JIT_R2, ptr + 8);
+#if __WORDSIZE == 64
+    jit_ldi_l(JIT_V0, ptr + 16);
+#endif
+    jit_prepare();
+#if __WORDSIZE == 64
+    jit_pushargi((jit_word_t)"%c %c %c %c\n");
+#else
+    jit_pushargi((jit_word_t)"%c %c %c l\n");
+#endif
+    jit_ellipsis();
+    jit_pushargr(JIT_R0);
+    jit_pushargr(JIT_R1);
+    jit_pushargr(JIT_R2);
+#if __WORDSIZE == 64
+    jit_pushargr(JIT_V0);
+#endif
+    jit_finishi(printf);
+
+    jit_ldi_uc(JIT_R0, ptr + 1);
+    jit_ldi_us(JIT_R1, ptr + 4);
+#if __WORDSIZE == 64
+    jit_ldi_ui(JIT_R2, ptr + 12);
+#endif
+    jit_prepare();
+#if __WORDSIZE == 64
+    jit_pushargi((jit_word_t)"%c %c %c\n");
+#else
+    jit_pushargi((jit_word_t)"%c %c I\n");
+#endif
+    jit_ellipsis();
+    jit_pushargr(JIT_R0);
+    jit_pushargr(JIT_R1);
+#if __WORDSIZE == 64
+    jit_pushargr(JIT_R2);
+#endif
+    jit_finishi(printf);
+
+    jit_ldi_f(JIT_F0, ptr + 24);
+    jit_extr_f_d(JIT_F0, JIT_F0);
+    jit_ldi_d(JIT_F1, ptr + 32);
+
+    jit_prepare();
+    jit_pushargi((jit_word_t)"%.1f %.1f\n");
+    jit_ellipsis();
+    jit_pushargr_d(JIT_F0);
+    jit_pushargr_d(JIT_F1);
+    jit_finishi(printf);
+
+    jit_realize();
+
+    jit_set_code(ptr + pagesize, pagesize * (mult - 1));
+
+ #if __NetBSD__ || __OpenBSD__ || __APPLE__
+    result = mprotect(ptr, pagesize, PROT_READ | PROT_WRITE);
+    assert(result == 0);
+#endif
+    function = jit_emit();
+    if (function == NULL)
+       abort();
+
+    //jit_disassemble();
+    jit_clear_state();
+#if __NetBSD__ ||  __OpenBSD__ || __APPLE__
+    result = mprotect(ptr + pagesize, pagesize, PROT_READ | PROT_EXEC);
+    assert(result == 0);
+#endif
+    (*function)();
+    jit_destroy_state();
+    finish_jit();
+
+    munmap(ptr, pagesize * mult);
+
+    return (0);
+}
diff --git a/deps/lightning/check/riprel.ok b/deps/lightning/check/riprel.ok
new file mode 100644 (file)
index 0000000..4b90837
--- /dev/null
@@ -0,0 +1,3 @@
+c s i l
+C S I
+1.0 2.0
index 62719ee..08611d9 100644 (file)
@@ -31,14 +31,14 @@ main(int argc, char *argv[])
     int                          mmap_fd;
 #endif
     void               (*function)(void);
     int                          mmap_fd;
 #endif
     void               (*function)(void);
-    int                          mmap_prot, mmap_flags;
+    int                          mmap_prot, mmap_flags, result;
 
 #if defined(__sgi)
     mmap_fd = open("/dev/zero", O_RDWR);
 #endif
 
     mmap_prot = PROT_READ | PROT_WRITE;
 
 #if defined(__sgi)
     mmap_fd = open("/dev/zero", O_RDWR);
 #endif
 
     mmap_prot = PROT_READ | PROT_WRITE;
-#if !__OpenBSD__
+#if !(__OpenBSD__ || __APPLE__)
     mmap_prot |= PROT_EXEC;
 #endif
 #if __NetBSD__
     mmap_prot |= PROT_EXEC;
 #endif
 #if __NetBSD__
@@ -83,7 +83,8 @@ main(int argc, char *argv[])
        abort();
 
 #if __NetBSD__
        abort();
 
 #if __NetBSD__
-    assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_WRITE) == 0);
+    result = mprotect(ptr, 1024 * 1024, PROT_READ | PROT_WRITE);
+    assert(result == 0);
 #endif
     /* and calling again with enough space works */
     jit_set_code(ptr, 1024 * 1024);
 #endif
     /* and calling again with enough space works */
     jit_set_code(ptr, 1024 * 1024);
@@ -92,8 +93,9 @@ main(int argc, char *argv[])
        abort();
 
     jit_clear_state();
        abort();
 
     jit_clear_state();
-#if __NetBSD__ ||  __OpenBSD__
-    assert(mprotect(ptr, 1024 * 1024, PROT_READ | PROT_EXEC) == 0);
+#if __NetBSD__ ||  __OpenBSD__ || __APPLE__
+    result = mprotect(ptr, 1024 * 1024, PROT_READ | PROT_EXEC);
+    assert(result == 0);
 #endif
     (*function)();
     jit_destroy_state();
 #endif
     (*function)();
     jit_destroy_state();
diff --git a/deps/lightning/check/skip.ok b/deps/lightning/check/skip.ok
new file mode 100644 (file)
index 0000000..f599e28
--- /dev/null
@@ -0,0 +1 @@
+10
diff --git a/deps/lightning/check/skip.tst b/deps/lightning/check/skip.tst
new file mode 100644 (file)
index 0000000..94eec76
--- /dev/null
@@ -0,0 +1,13 @@
+.data  32
+fmt:
+.c     "%d\n"
+.code
+       prolog
+        skip    4
+        prepare
+                pushargi fmt
+                ellipsis
+                pushargi 10
+        finishi @printf
+        ret
+        epilog
index e699719..1ebe4f5 100644 (file)
@@ -55,7 +55,7 @@ fill##T##done:                                                        \
 #define fill_us                fill_s
 #define fill_ui                fill_i
 
 #define fill_us                fill_s
 #define fill_ui                fill_i
 
-#define ARG(  T, N)                    arg    $arg##T##N
+#define ARG(  T, N)                    arg##T $arg##T##N
 #define ARGF( T, N)                    arg##T $arg##T##N
 #define ARG1( K, T)                    ARG##K(T, 0)
 #define ARG2( K, T)    ARG1( K, T)     ARG##K(T, 1)
 #define ARGF( T, N)                    arg##T $arg##T##N
 #define ARG1( K, T)                    ARG##K(T, 0)
 #define ARG2( K, T)    ARG1( K, T)     ARG##K(T, 1)
@@ -74,56 +74,56 @@ fill##T##done:                                                      \
 #define ARG15(K, T)    ARG14(K, T)     ARG##K(T, 14)
 #define ARG16(K, T)    ARG15(K, T)     ARG##K(T, 15)
 #define ARG_c(N)                       ARG##N( , _c)
 #define ARG15(K, T)    ARG14(K, T)     ARG##K(T, 14)
 #define ARG16(K, T)    ARG15(K, T)     ARG##K(T, 15)
 #define ARG_c(N)                       ARG##N( , _c)
-#define ARG_uc(N)                      ARG##N( , _uc)
+#define ARG_uc(N)                      ARG##N( , _c)
 #define ARG_s(N)                       ARG##N( , _s)
 #define ARG_s(N)                       ARG##N( , _s)
-#define ARG_us(N)                      ARG##N( , _us)
+#define ARG_us(N)                      ARG##N( , _s)
 #define ARG_i(N)                       ARG##N( , _i)
 #define ARG_i(N)                       ARG##N( , _i)
-#define ARG_ui(N)                      ARG##N( , _ui)
+#define ARG_ui(N)                      ARG##N( , _i)
 #define ARG_l(N)                       ARG##N( , _l)
 #define ARG_f(N)                       ARG##N(F, _f)
 #define ARG_d(N)                       ARG##N(F, _d)
 
 #define ARG_l(N)                       ARG##N( , _l)
 #define ARG_f(N)                       ARG##N(F, _f)
 #define ARG_d(N)                       ARG##N(F, _d)
 
-#define CHK(N, T, V)                                           \
-       getarg %r0 $arg##T##V                                   \
+#define CHK(N, T, TT, V)                                       \
+       getarg##T %r0 $arg##TT##V                               \
        ldxi##T %r1 %v0 $(V * szof##T)                          \
        beqr N##T##V %r0 %r1                                    \
        calli @abort                                            \
 N##T##V:
        ldxi##T %r1 %v0 $(V * szof##T)                          \
        beqr N##T##V %r0 %r1                                    \
        calli @abort                                            \
 N##T##V:
-#define CHKF(N, T, V)                                          \
-       getarg##T %f0 $arg##T##V                                \
+#define CHKF(N, T, TT, V)                                      \
+       getarg##T %f0 $arg##TT##V                               \
        ldxi##T %f1 %v0 $(V * szof##T)                          \
        beqr##T N##T##V %f0 %f1                                 \
        calli @abort                                            \
 N##T##V:
 
        ldxi##T %f1 %v0 $(V * szof##T)                          \
        beqr##T N##T##V %f0 %f1                                 \
        calli @abort                                            \
 N##T##V:
 
-#define GET1( K, N, T, V)                              CHK##K(N, T, 0)
-#define GET2( K, N, T, V)      GET1( K, N, T, V)       CHK##K(N, T, 1)
-#define GET3( K, N, T, V)      GET2( K, N, T, V)       CHK##K(N, T, 2)
-#define GET4( K, N, T, V)      GET3( K, N, T, V)       CHK##K(N, T, 3)
-#define GET5( K, N, T, V)      GET4( K, N, T, V)       CHK##K(N, T, 4)
-#define GET6( K, N, T, V)      GET5( K, N, T, V)       CHK##K(N, T, 5)
-#define GET7( K, N, T, V)      GET6( K, N, T, V)       CHK##K(N, T, 6)
-#define GET8( K, N, T, V)      GET7( K, N, T, V)       CHK##K(N, T, 7)
-#define GET9( K, N, T, V)      GET8( K, N, T, V)       CHK##K(N, T, 8)
-#define GET10(K, N, T, V)      GET9( K, N, T, V)       CHK##K(N, T, 9)
-#define GET11(K, N, T, V)      GET10(K, N, T, V)       CHK##K(N, T, 10)
-#define GET12(K, N, T, V)      GET11(K, N, T, V)       CHK##K(N, T, 11)
-#define GET13(K, N, T, V)      GET12(K, N, T, V)       CHK##K(N, T, 12)
-#define GET14(K, N, T, V)      GET13(K, N, T, V)       CHK##K(N, T, 13)
-#define GET15(K, N, T, V)      GET14(K, N, T, V)       CHK##K(N, T, 14)
-#define GET16(K, N, T, V)      GET15(K, N, T, V)       CHK##K(N, T, 15)
+#define GET1( K, N, T, TT, V)                          CHK##K(N, T, TT, 0)
+#define GET2( K, N, T, TT, V)  GET1( K, N, T, TT, V)   CHK##K(N, T, TT, 1)
+#define GET3( K, N, T, TT, V)  GET2( K, N, T, TT, V)   CHK##K(N, T, TT, 2)
+#define GET4( K, N, T, TT, V)  GET3( K, N, T, TT, V)   CHK##K(N, T, TT, 3)
+#define GET5( K, N, T, TT, V)  GET4( K, N, T, TT, V)   CHK##K(N, T, TT, 4)
+#define GET6( K, N, T, TT, V)  GET5( K, N, T, TT, V)   CHK##K(N, T, TT, 5)
+#define GET7( K, N, T, TT, V)  GET6( K, N, T, TT, V)   CHK##K(N, T, TT, 6)
+#define GET8( K, N, T, TT, V)  GET7( K, N, T, TT, V)   CHK##K(N, T, TT, 7)
+#define GET9( K, N, T, TT, V)  GET8( K, N, T, TT, V)   CHK##K(N, T, TT, 8)
+#define GET10(K, N, T, TT, V)  GET9( K, N, T, TT, V)   CHK##K(N, T, TT, 9)
+#define GET11(K, N, T, TT, V)  GET10(K, N, T, TT, V)   CHK##K(N, T, TT, 10)
+#define GET12(K, N, T, TT, V)  GET11(K, N, T, TT, V)   CHK##K(N, T, TT, 11)
+#define GET13(K, N, T, TT, V)  GET12(K, N, T, TT, V)   CHK##K(N, T, TT, 12)
+#define GET14(K, N, T, TT, V)  GET13(K, N, T, TT, V)   CHK##K(N, T, TT, 13)
+#define GET15(K, N, T, TT, V)  GET14(K, N, T, TT, V)   CHK##K(N, T, TT, 14)
+#define GET16(K, N, T, TT, V)  GET15(K, N, T, TT, V)   CHK##K(N, T, TT, 15)
 
 
-#define GET_c(N, M)            GET##N( , c##N,  _c,  M)
-#define GET_uc(N, M)           GET##N( , uc##N, _uc, M)
-#define GET_s(N, M)            GET##N( , s##N,  _s,  M)
-#define GET_us(N, M)           GET##N( , us##N, _us, M)
-#define GET_i(N, M)            GET##N( , i##N,  _i,  M)
-#define GET_ui(N, M)           GET##N( , ui##N, _ui, M)
-#define GET_l(N, M)            GET##N( , l##N,  _l,  M)
-#define GET_f(N, M)            GET##N(F, f##N,  _f,  M)
-#define GET_d(N, M)            GET##N(F, d##N,  _d,  M)
+#define GET_c(N, M)            GET##N( , c##N,  _c,  _c, M)
+#define GET_uc(N, M)           GET##N( , uc##N, _uc, _c, M)
+#define GET_s(N, M)            GET##N( , s##N,  _s,  _s, M)
+#define GET_us(N, M)           GET##N( , us##N, _us, _s, M)
+#define GET_i(N, M)            GET##N( , i##N,  _i,  _i, M)
+#define GET_ui(N, M)           GET##N( , ui##N, _ui, _i, M)
+#define GET_l(N, M)            GET##N( , l##N,  _l,  _l, M)
+#define GET_f(N, M)            GET##N(F, f##N,  _f,  _f, M)
+#define GET_d(N, M)            GET##N(F, d##N,  _d,  _d, M)
 
 
-#define PUSH(  T, V)           pushargi    V
+#define PUSH(  T, V)           pushargi##T V
 #define PUSHF( T, V)           pushargi##T V
 #define PUSH0( K, T)           /**/
 #define PUSH1( K, T)                                   PUSH##K(T, 0)
 #define PUSHF( T, V)           pushargi##T V
 #define PUSH0( K, T)           /**/
 #define PUSH1( K, T)                                   PUSH##K(T, 0)
@@ -161,14 +161,14 @@ test##T##_0:                                                      \
        ret                                                     \
        epilog
 
        ret                                                     \
        epilog
 
-#define DEFN(N, M, T)                                          \
+#define DEFN(N, M, T, TT)                                      \
        name test##T##_##N                                      \
 test##T##_##N:                                                 \
        prolog                                                  \
        arg $argp                                               \
        /* stack buffer in %v0 */                               \
        getarg %v0 $argp                                        \
        name test##T##_##N                                      \
 test##T##_##N:                                                 \
        prolog                                                  \
        arg $argp                                               \
        /* stack buffer in %v0 */                               \
        getarg %v0 $argp                                        \
-       ARG##T(N)                                               \
+       ARG##TT(N)                                              \
        /* validate arguments */                                \
        GET##T(N, M)                                            \
        /* heap buffer in %v1 */                                \
        /* validate arguments */                                \
        GET##T(N, M)                                            \
        /* heap buffer in %v1 */                                \
@@ -258,24 +258,24 @@ test##T##_17_done:                                                \
        ret                                                     \
        epilog
 
        ret                                                     \
        epilog
 
-#define DEF(  T)                                               \
+#define DEF(  T, TT)                                           \
        DEF0( T)                                                \
        DEF0( T)                                                \
-       DEFN( 1,  0, T)                                         \
-       DEFN( 2,  1, T)                                         \
-       DEFN( 3,  2, T)                                         \
-       DEFN( 4,  3, T)                                         \
-       DEFN( 5,  4, T)                                         \
-       DEFN( 6,  5, T)                                         \
-       DEFN( 7,  6, T)                                         \
-       DEFN( 8,  7, T)                                         \
-       DEFN( 9,  8, T)                                         \
-       DEFN(10,  9, T)                                         \
-       DEFN(11, 10, T)                                         \
-       DEFN(12, 11, T)                                         \
-       DEFN(13, 12, T)                                         \
-       DEFN(14, 13, T)                                         \
-       DEFN(15, 14, T)                                         \
-       DEFN(16, 15, T)                                         \
+       DEFN( 1,  0, T, TT)                                     \
+       DEFN( 2,  1, T, TT)                                     \
+       DEFN( 3,  2, T, TT)                                     \
+       DEFN( 4,  3, T, TT)                                     \
+       DEFN( 5,  4, T, TT)                                     \
+       DEFN( 6,  5, T, TT)                                     \
+       DEFN( 7,  6, T, TT)                                     \
+       DEFN( 8,  7, T, TT)                                     \
+       DEFN( 9,  8, T, TT)                                     \
+       DEFN(10,  9, T, TT)                                     \
+       DEFN(11, 10, T, TT)                                     \
+       DEFN(12, 11, T, TT)                                     \
+       DEFN(13, 12, T, TT)                                     \
+       DEFN(14, 13, T, TT)                                     \
+       DEFN(15, 14, T, TT)                                     \
+       DEFN(16, 15, T, TT)                                     \
        DEFX(T)
 
 #define CALL(T)                        calli test##T##_17
        DEFX(T)
 
 #define CALL(T)                        calli test##T##_17
@@ -321,17 +321,17 @@ memcpy_done:
        FILLF(_f)
        FILLF(_d)
 
        FILLF(_f)
        FILLF(_d)
 
-       DEF(_c)
-       DEF(_uc)
-       DEF(_s)
-       DEF(_us)
-       DEF(_i)
+       DEF(_c, _c)
+       DEF(_uc, _c)
+       DEF(_s, _s)
+       DEF(_us, _s)
+       DEF(_i, _i)
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-       DEF(_ui)
-       DEF(_l)
+       DEF(_ui, _i)
+       DEF(_l, _l)
 #endif
 #endif
-       DEF(_f)
-       DEF(_d)
+       DEF(_f, _f)
+       DEF(_d, _d)
 
        name main
 main:
 
        name main
 main:
index 39d2209..3fb09e7 100644 (file)
@@ -1,5 +1,5 @@
 dnl
 dnl
-dnl Copyright 2000, 2001, 2002, 2012-2019 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc.
 dnl
 dnl This file is part of GNU lightning.
 dnl
 dnl
 dnl This file is part of GNU lightning.
 dnl
@@ -15,7 +15,7 @@ dnl License for more details.
 dnl
 
 AC_PREREQ([2.71])
 dnl
 
 AC_PREREQ([2.71])
-AC_INIT([GNU lightning],[2.1.3],[pcpa@gnu.org],[lightning])
+AC_INIT([GNU lightning],[2.2.1],[pcpa@gnu.org],[lightning])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CANONICAL_TARGET
 AC_CONFIG_SRCDIR([Makefile.am])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CANONICAL_TARGET
 AC_CONFIG_SRCDIR([Makefile.am])
@@ -60,6 +60,12 @@ case "$target_cpu" in
                fi                              ;;
            *)                                  ;;
        esac                                    ;;
                fi                              ;;
            *)                                  ;;
        esac                                    ;;
+    aarch64)
+       case "$host_os" in
+           darwin*)
+               LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DPACKED_STACK=1"                   ;;
+           *)                                  ;;
+       esac                                    ;;
     *)                                         ;;
 esac
 
     *)                                         ;;
 esac
 
@@ -163,29 +169,43 @@ if test "x$DEVEL_DISASSEMBLER" != "xno"; then
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEVEL_DISASSEMBLER=1"
 fi
 
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEVEL_DISASSEMBLER=1"
 fi
 
+# This option is only useful during development.
+AC_ARG_ENABLE(devel-get-jit-size,
+             AS_HELP_STRING([--enable-devel-get-jit-size],
+                            [Devel mode to regenerate jit size information]),
+             [GET_JIT_SIZE=$enableval], [GET_JIT_SIZE=no])
+AM_CONDITIONAL(get_jit_size, [test $GET_JIT_SIZE = yes])
+
 AC_ARG_ENABLE(assertions,
              AS_HELP_STRING([--enable-assertions],
                             [Enable runtime code generation assertions]),
              [DEBUG=$enableval], [DEBUG=auto])
 AC_ARG_ENABLE(assertions,
              AS_HELP_STRING([--enable-assertions],
                             [Enable runtime code generation assertions]),
              [DEBUG=$enableval], [DEBUG=auto])
-if test "x$DEBUG" = xyes; then
+
+# This option might be made default in the future
+# Currently it is only useful to ensure existing code will work
+# if PACKED_STACK is also defined.
+AC_ARG_ENABLE(devel-strong-type-checking,
+             AS_HELP_STRING([--enable-devel-strong-type-checking],
+                            [Devel mode for strong type checking]),
+             [STRONG_TYPE_CHECKING=$enableval], [STRONG_TYPE_CHECKING=no])
+if test "x$DEBUG" = xyes -o x"$STRONG_TYPE_CHECKING" = xyes; then
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEBUG=1"
 else
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DNDEBUG"
     DEBUG=no
 fi
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DDEBUG=1"
 else
     LIGHTNING_CFLAGS="$LIGHTNING_CFLAGS -DNDEBUG"
     DEBUG=no
 fi
+AM_CONDITIONAL(strong_type_checking, [test $STRONG_TYPE_CHECKING = yes])
 
 
-# This option is only useful during development.
-AC_ARG_ENABLE(devel-get-jit-size,
-             AS_HELP_STRING([--enable-devel-get-jit-size],
-                            [Devel mode to regenerate jit size information]),
-             [GET_JIT_SIZE=$enableval], [GET_JIT_SIZE=no])
-AM_CONDITIONAL(get_jit_size, [test $GET_JIT_SIZE = yes])
+AC_CHECK_LIB(dl, dlopen, [HAVE_LIBDL="yes"])
+AC_CHECK_LIB(dld, dlopen, [HAVE_LIBDLD="yes"])
 
 
-case "$host_os" in
-    *bsd*|osf*)                SHLIB=""        ;;
-    *hpux*)            SHLIB="-ldld"   ;;
-    *)                 SHLIB="-ldl"    ;;
-esac
+if test "x$HAVE_LIBDL" = xyes; then
+    SHLIB="-ldl";
+elif test "x$HAVE_LIBDLD" = xyes; then
+    SHLIB="-ldld";
+else
+    SHLIB="";
+fi
 AC_SUBST(SHLIB)
 
 cpu=
 AC_SUBST(SHLIB)
 
 cpu=
@@ -233,7 +253,7 @@ elif test $cpu = x86; then
     int main(void) {
        int                 ac, flags;
        unsigned int        eax, ebx, ecx, edx;
     int main(void) {
        int                 ac, flags;
        unsigned int        eax, ebx, ecx, edx;
-       if (__WORDSIZE == 64)
+       if (sizeof(long) == 8)
            return 1;
        __asm__ volatile ("pushfl;\n\t"
                          "popl %0;\n\t"
            return 1;
        __asm__ volatile ("pushfl;\n\t"
                          "popl %0;\n\t"
index 6398bce..4cec67e 100644 (file)
@@ -1,5 +1,5 @@
 #
 #
-# Copyright 2012-2022 Free Software Foundation, Inc.
+# Copyright 2012-2023 Free Software Foundation, Inc.
 #
 # This file is part of GNU lightning.
 #
 #
 # This file is part of GNU lightning.
 #
@@ -14,7 +14,8 @@
 # License for more details.
 #
 
 # License for more details.
 #
 
-AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include -D_GNU_SOURCE
+AM_CFLAGS = -I $(top_builddir)/include -I$(top_srcdir)/include \
+       -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 
 info_TEXINFOS = lightning.texi
 MOSTLYCLEANFILES = lightning.tmp
 
 info_TEXINFOS = lightning.texi
 MOSTLYCLEANFILES = lightning.tmp
index 1d8d277..1bd3f67 100644 (file)
@@ -101,17 +101,30 @@ the @file{configure} shell script; to run it, merely type:
      ./configure
 @end example
 
      ./configure
 @end example
 
-@lightning{} supports the @code{--enable-disassembler} option, that
-enables linking to GNU binutils and optionally print human readable
+The @file{configure} accepts the @code{--enable-disassembler} option,
+hat enables linking to GNU binutils and optionally print human readable
 disassembly of the jit code. This option can be disabled by the
 @code{--disable-disassembler} option.
 
 disassembly of the jit code. This option can be disabled by the
 @code{--disable-disassembler} option.
 
-Another option that @file{configure} accepts is
-@code{--enable-assertions}, which enables several consistency checks in
-the run-time assemblers.  These are not usually needed, so you can
-decide to simply forget about it; also remember that these consistency
+@file{configure} also accepts the  @code{--enable-devel-disassembler},
+option useful to check exactly hat machine instructions were generated
+for a @lightning{} instrction. Basically mixing @code{jit_print} and
+@code{jit_disassembly}.
+
+The @code{--enable-assertions} option, which enables several consistency
+hecks in the run-time assemblers.  These are not usually needed, so you
+can decide to simply forget about it; also remember that these consistency
 checks tend to slow down your code generator.
 
 checks tend to slow down your code generator.
 
+The @code{--enable-devel-strong-type-checking} option that does extra type
+checking using @code{assert}. This option also enables the
+@code{--enable-assertions} unless it is explicitly disabled.
+
+The option @code{--enable-devel-get-jit-size} should only be used
+when doing updates or maintenance to lightning. It regenerates the
+@code{jit_$ARCH]-sz.c} creating a table or maximum bytes usage when
+translating a @lightning{} instruction to machine code.
+
 After you've configured @lightning{}, run @file{make} as usual.
 
 @lightning{} has an extensive set of tests to validate it is working
 After you've configured @lightning{}, run @file{make} as usual.
 
 @lightning{} has an extensive set of tests to validate it is working
@@ -278,12 +291,27 @@ These accept two operands, both of which must be registers.
 @example
 negr         _f  _d  O1 = -O2
 comr                 O1 = ~O2
 @example
 negr         _f  _d  O1 = -O2
 comr                 O1 = ~O2
+clor                O1 = number of leading one bits
+clzr                O1 = number of leading zero bits
+ctor                O1 = number of trailing one bits
+ctzr                O1 = number of trailing zero bits
 @end example
 
 @end example
 
+Note that @code{ctzr} is basically equivalent of a @code{C} call
+@code{ffs} but indexed at bit zero, not one.
+
+Contrary to @code{__builtin_ctz} and @code{__builtin_clz}, an input
+value of zero is not an error, it just returns the number of bits
+in a word, 64 if @lightning{} generates 64 bit instructions, otherwise
+it returns 32.
+
+The @code{clor} and @code{ctor} are just counterparts of the versions
+that search for zero bits.
+
 These unary ALU operations are only defined for float operands.
 @example
 absr         _f  _d  O1 = fabs(O2)
 These unary ALU operations are only defined for float operands.
 @example
 absr         _f  _d  O1 = fabs(O2)
-sqrtr                O1 = sqrt(O2)
+sqrtr        _f  _d  O1 = sqrt(O2)
 @end example
 
 Besides requiring the @code{r} modifier, there are no unary operations
 @end example
 
 Besides requiring the @code{r} modifier, there are no unary operations
@@ -401,31 +429,33 @@ ldxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
 both cases, the first can be either a register or an immediate
 value. Values are sign-extended to fit a whole register.
 @example
 both cases, the first can be either a register or an immediate
 value. Values are sign-extended to fit a whole register.
 @example
-str     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
-sti     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *O1 = O2
-stxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
-stxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  *(O1+O2) = O3
+str     _c       _s       _i       _l  _f  _d  *O1 = O2
+sti     _c       _s       _i       _l  _f  _d  *O1 = O2
+stxr    _c       _s       _i       _l  _f  _d  *(O1+O2) = O3
+stxi    _c       _s       _i       _l  _f  _d  *(O1+O2) = O3
 @end example
 @end example
-As for the load operations, the @code{_ui} and @code{_l} types are
-only available in 64-bit architectures, and for convenience, there
-is a version without a type modifier for integer or pointer operands
-that uses the appropriate wordsize call.
+Note that the unsigned type modifier is not available, as the store
+only writes to the 1, 2, 4 or 8 sized memory address.
+The @code{_l} type is only available in 64-bit architectures, and for
+convenience, there is a version without a type modifier for integer or
+pointer operands that uses the appropriate wordsize call.
 
 @item Argument management
 These are:
 @example
 prepare     (not specified)
 va_start    (not specified)
 
 @item Argument management
 These are:
 @example
 prepare     (not specified)
 va_start    (not specified)
-pushargr                                   _f  _d
-pushargi                                   _f  _d
+pushargr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+pushargi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 va_push     (not specified)
 va_push     (not specified)
-arg                                        _f  _d
+arg         _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 getarg      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 va_arg                                         _d
 getarg      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 va_arg                                         _d
-putargr                                    _f  _d
-putargi                                    _f  _d
+putargr     _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+putargi     _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 ret         (not specified)
 ret         (not specified)
-retr                                       _f  _d
+retr        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
+reti        _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 reti                                       _f  _d
 va_end      (not specified)
 retval      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
 reti                                       _f  _d
 va_end      (not specified)
 retval      _c  _uc  _s  _us  _i  _ui  _l  _f  _d
@@ -444,6 +474,15 @@ the @code{pushargr} or @code{pushargi} to push the arguments @strong{in
 left to right order}; and use @code{finish} or @code{call} (explained below)
 to perform the actual call.
 
 left to right order}; and use @code{finish} or @code{call} (explained below)
 to perform the actual call.
 
+Note that @code{arg}, @code{pusharg}, @code{putarg} and @code{ret} when
+handling integer types can be used without a type modifier.
+It is suggested to use matching type modifiers to @code{arg}, @code{putarg}
+and @code{getarg} otherwise problems will happen if generating jit for
+environments that require arguments to be truncated and zero or sign
+extended by the caller and/or excess arguments might be passed packed
+in the stack. Currently only Apple systems with @code{aarch64} cpus are
+known to have this restriction.
+
 @code{va_start} returns a @code{C} compatible @code{va_list}. To fetch
 arguments, use @code{va_arg} for integers and @code{va_arg_d} for doubles.
 @code{va_push} is required when passing a @code{va_list} to another function,
 @code{va_start} returns a @code{C} compatible @code{va_list}. To fetch
 arguments, use @code{va_arg} for integers and @code{va_arg_d} for doubles.
 @code{va_push} is required when passing a @code{va_list} to another function,
@@ -565,6 +604,10 @@ bxsubr    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
 bxsubi    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
 @end example
 
 bxsubi    _u          O2 -= O3@r{, goto }O1@r{ if no overflow}
 @end example
 
+Note that the @code{C} code does not have an @code{O1} argument. It is
+required to always use the return value as an argument to @code{patch},
+@code{patch_at} or @code{patch_abs}.
+
 @item Jump and return operations
 These accept one argument except @code{ret} and @code{jmpi} which
 have none; the difference between @code{finishi} and @code{calli}
 @item Jump and return operations
 These accept one argument except @code{ret} and @code{jmpi} which
 have none; the difference between @code{finishi} and @code{calli}
@@ -603,6 +646,14 @@ the next instruction, usually with a label:
 align     (not specified)                @r{align code}
 @end example
 
 align     (not specified)                @r{align code}
 @end example
 
+Similar to @code{align} is the next instruction, also usually used with
+a label:
+@example
+skip      (not specified)                @r{skip code}
+@end example
+It is used to specify a minimal number of bytes of nops to be inserted
+before the next instruction.
+
 @code{label} is normally used as @code{patch_at} argument for backward
 jumps.
 
 @code{label} is normally used as @code{patch_at} argument for backward
 jumps.
 
@@ -687,6 +738,10 @@ label2  = jit_indirect();                @rem{/* second entry point */}
           assert(addr2 - addr1 == 16);   @rem{/* only one of the addresses needs to be remembered */}
 @end example
 
           assert(addr2 - addr1 == 16);   @rem{/* only one of the addresses needs to be remembered */}
 @end example
 
+@code{skip} is useful for reserving space in the code buffer that can
+later be filled (possibly with the help of the pair of functions
+@code{jit_unprotect} and @code{jit_protect}).
+
 @item Function prolog
 
 These macros are used to set up a function prolog.  The @code{allocai}
 @item Function prolog
 
 These macros are used to set up a function prolog.  The @code{allocai}
@@ -919,7 +974,7 @@ will return non zero if the argument lives in a register. This call
 is useful to know the live range of register arguments, as those
 are very fast to read and write, but have volatile values.
 
 is useful to know the live range of register arguments, as those
 are very fast to read and write, but have volatile values.
 
-@code{callee_save_p} exects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
+@code{callee_save_p} expects a valid @code{JIT_Rn}, @code{JIT_Vn}, or
 @code{JIT_Fn}, and will return non zero if the register is callee
 save. This call is useful because on several ports, the @code{JIT_Rn}
 and @code{JIT_Fn} registers are actually callee save; no need
 @code{JIT_Fn}, and will return non zero if the register is callee
 save. This call is useful because on several ports, the @code{JIT_Rn}
 and @code{JIT_Fn} registers are actually callee save; no need
@@ -1144,26 +1199,13 @@ maps to @code{%g2} on the SPARC).
 @table @b
 @item x86_64
 @example
 @table @b
 @item x86_64
 @example
-    sub   $0x30,%rsp
-    mov   %rbp,(%rsp)
-    mov   %rsp,%rbp
-    sub   $0x18,%rsp
-    mov   %rdi,%rax            mov %rdi, %rax
-    add   $0x1,%rax            inc %rax
-    mov   %rbp,%rsp
-    mov   (%rsp),%rbp
-    add   $0x30,%rsp
-    retq                       retq
+    mov   %rdi,%rax
+    add   $0x1,%rax
+    ret
 @end example
 @end example
-In this case, the main overhead is due to the function's prolog and
-epilog, and stack alignment after reserving stack space for word
-to/from float conversions or moving data from/to x87 to/from SSE.
-Note that besides allocating space to save callee saved registers,
-no registers are saved/restored because @lightning{} notices those
-registers are not modified. There is currently no logic to detect
-if it needs to allocate stack space for type conversions neither
-proper leaf function detection, but these are subject to change
-(FIXME).
+In this case, for the x86 port, @lightning{} has simple optimizations
+to understand it is a leaf function, and that it is not required to
+create a stack frame nor update the stack pointer.
 @end table
 
 @node printf
 @end table
 
 @node printf
@@ -1327,7 +1369,7 @@ jit_node_t *compile_rpn(char *expr)
   in = jit_arg();
   stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
 
   in = jit_arg();
   stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
 
-  jit_getarg_i(JIT_R2, in);
+  jit_getarg(JIT_R2, in);
 
   while (*expr) @{
     char buf[32];
 
   while (*expr) @{
     char buf[32];
@@ -1680,6 +1722,28 @@ Get the current memory allocation function. Also, unlike the GNU GMP
 counterpart, it is an error to pass @code{NULL} pointers as arguments.
 @end deftypefun
 
 counterpart, it is an error to pass @code{NULL} pointers as arguments.
 @end deftypefun
 
+@section Protection
+Unless an alternate code buffer is used (see below), @code{jit_emit}
+set the access protections that the code buffer's memory can be read and
+executed, but not modified.  One can use the following functions after
+@code{jit_emit} but before @code{jit_clear} to temporarily lift the
+protection:
+
+@deftypefun void jit_unprotect ()
+Changes the access protection that the code buffer's memory can be read and
+modified.  Before the emitted code can be invoked, @code{jit_protect}
+has to be called to reset the change.
+
+This procedure has no effect when an alternate code buffer (see below) is used.
+@end deftypefun
+
+@deftypefun void jit_protect ()
+Changes the access protection that the code buffer's memory can be read and
+executed.
+
+This procedure has no effect when an alternate code buffer (see below) is used.
+@end deftypefun
+
 @section Alternate code buffer
 To instruct @lightning{} to use an alternate code buffer it is required
 to call @code{jit_realize} before @code{jit_emit}, and then query states
 @section Alternate code buffer
 To instruct @lightning{} to use an alternate code buffer it is required
 to call @code{jit_realize} before @code{jit_emit}, and then query states
index 8131484..edb3d8e 100644 (file)
@@ -24,7 +24,7 @@ jit_node_t *compile_rpn(char *expr)
 
   fn = jit_note(NULL, 0);
   jit_prolog();
 
   fn = jit_note(NULL, 0);
   jit_prolog();
-  in = jit_arg();
+  in = jit_arg_i();
   stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
 
   jit_getarg_i(JIT_R2, in);
   stack_ptr = stack_base = jit_allocai (32 * sizeof (int));
 
   jit_getarg_i(JIT_R2, in);
index ce622e2..bd48777 100644 (file)
@@ -1,5 +1,5 @@
 #
 #
-# Copyright 2000, 2001, 2002, 2012-2022 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012-2023 Free Software Foundation, Inc.
 #
 # This file is part of GNU lightning.
 #
 #
 # This file is part of GNU lightning.
 #
index 67c6af1..7aa654c 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -190,6 +190,8 @@ typedef enum {
 #define jit_align(u)           jit_new_node_w(jit_code_align, u)
     jit_code_live,             jit_code_align,
     jit_code_save,             jit_code_load,
 #define jit_align(u)           jit_new_node_w(jit_code_align, u)
     jit_code_live,             jit_code_align,
     jit_code_save,             jit_code_load,
+#define jit_skip(u)             jit_new_node_w(jit_code_skip, u)
+    jit_code_skip,
 #define jit_name(u)            _jit_name(_jit,u)
     jit_code_name,
 #define jit_note(u, v)         _jit_note(_jit, u, v)
 #define jit_name(u)            _jit_name(_jit,u)
     jit_code_name,
 #define jit_note(u, v)         _jit_note(_jit, u, v)
@@ -210,27 +212,80 @@ typedef enum {
 #define jit_allocar(u, v)      _jit_allocar(_jit,u,v)
     jit_code_allocai,          jit_code_allocar,
 
 #define jit_allocar(u, v)      _jit_allocar(_jit,u,v)
     jit_code_allocai,          jit_code_allocar,
 
-#define jit_arg()              _jit_arg(_jit)
-    jit_code_arg,
+#define jit_arg_c()            _jit_arg(_jit, jit_code_arg_c)
+#define jit_arg_s()            _jit_arg(_jit, jit_code_arg_s)
+#define jit_arg_i()            _jit_arg(_jit, jit_code_arg_i)
+# if __WORDSIZE == 32
+#  define jit_arg()            jit_arg_i()
+#else
+#  define jit_arg_l()          _jit_arg(_jit, jit_code_arg_l)
+#  define jit_arg()            jit_arg_l()
+#endif
+    jit_code_arg_c,            jit_code_arg_s,
+    jit_code_arg_i,            jit_code_arg_l,
+#if __WORDSIZE == 32
+#  define jit_code_arg         jit_code_arg_i
+#else
+#  define jit_code_arg         jit_code_arg_l
+#endif
+
 #define jit_getarg_c(u,v)      _jit_getarg_c(_jit,u,v)
 #define jit_getarg_uc(u,v)     _jit_getarg_uc(_jit,u,v)
 #define jit_getarg_c(u,v)      _jit_getarg_c(_jit,u,v)
 #define jit_getarg_uc(u,v)     _jit_getarg_uc(_jit,u,v)
-    jit_code_getarg_c,         jit_code_getarg_uc,
 #define jit_getarg_s(u,v)      _jit_getarg_s(_jit,u,v)
 #define jit_getarg_us(u,v)     _jit_getarg_us(_jit,u,v)
 #define jit_getarg_s(u,v)      _jit_getarg_s(_jit,u,v)
 #define jit_getarg_us(u,v)     _jit_getarg_us(_jit,u,v)
-    jit_code_getarg_s,         jit_code_getarg_us,
 #define jit_getarg_i(u,v)      _jit_getarg_i(_jit,u,v)
 #if __WORDSIZE == 32
 #  define jit_getarg(u,v)      jit_getarg_i(u,v)
 #else
 #define jit_getarg_i(u,v)      _jit_getarg_i(_jit,u,v)
 #if __WORDSIZE == 32
 #  define jit_getarg(u,v)      jit_getarg_i(u,v)
 #else
-#  define jit_getarg(u,v)      jit_getarg_l(u,v)
 #  define jit_getarg_ui(u,v)   _jit_getarg_ui(_jit,u,v)
 #  define jit_getarg_l(u,v)    _jit_getarg_l(_jit,u,v)
 #  define jit_getarg_ui(u,v)   _jit_getarg_ui(_jit,u,v)
 #  define jit_getarg_l(u,v)    _jit_getarg_l(_jit,u,v)
+#  define jit_getarg(u,v)      jit_getarg_l(u,v)
 #endif
 #endif
+    jit_code_getarg_c,         jit_code_getarg_uc,
+    jit_code_getarg_s,         jit_code_getarg_us,
     jit_code_getarg_i,         jit_code_getarg_ui,
     jit_code_getarg_l,
     jit_code_getarg_i,         jit_code_getarg_ui,
     jit_code_getarg_l,
-#  define jit_putargr(u,v)     _jit_putargr(_jit,u,v)
-#  define jit_putargi(u,v)     _jit_putargi(_jit,u,v)
-    jit_code_putargr,          jit_code_putargi,
+#if __WORDSIZE == 32
+#  define jit_code_getarg      jit_code_getarg_i
+#else
+#  define jit_code_getarg      jit_code_getarg_l
+#endif
+
+#define jit_putargr_c(u,v)     _jit_putargr(_jit,u,v,jit_code_putargr_c)
+#define jit_putargi_c(u,v)     _jit_putargi(_jit,u,v,jit_code_putargi_c)
+#define jit_putargr_uc(u,v)    _jit_putargr(_jit,u,v,jit_code_putargr_uc)
+#define jit_putargi_uc(u,v)    _jit_putargi(_jit,u,v,jit_code_putargi_uc)
+#define jit_putargr_s(u,v)     _jit_putargr(_jit,u,v,jit_code_putargr_s)
+#define jit_putargi_s(u,v)     _jit_putargi(_jit,u,v,jit_code_putargi_s)
+#define jit_putargr_us(u,v)    _jit_putargr(_jit,u,v,jit_code_putargr_us)
+#define jit_putargi_us(u,v)    _jit_putargi(_jit,u,v,jit_code_putargi_us)
+#define jit_putargr_i(u,v)     _jit_putargr(_jit,u,v,jit_code_putargr_i)
+#define jit_putargi_i(u,v)     _jit_putargi(_jit,u,v,jit_code_putargi_i)
+#if __WORDSIZE == 32
+#  define jit_putargr(u,v)     jit_putargr_i(u,v)
+#  define jit_putargi(u,v)     jit_putargi_i(u,v)
+#else
+#  define jit_putargr_ui(u,v)  _jit_putargr(_jit,u,v,jit_code_putargr_ui)
+#  define jit_putargi_ui(u,v)  _jit_putargi(_jit,u,v,jit_code_putargi_ui)
+#  define jit_putargr_l(u,v)   _jit_putargr(_jit,u,v,jit_code_putargr_l)
+#  define jit_putargi_l(u,v)   _jit_putargi(_jit,u,v,jit_code_putargi_l)
+#  define jit_putargr(u,v)     jit_putargr_l(u,v)
+#  define jit_putargi(u,v)     jit_putargi_l(u,v)
+#endif
+    jit_code_putargr_c,                jit_code_putargi_c,
+    jit_code_putargr_uc,       jit_code_putargi_uc,
+    jit_code_putargr_s,                jit_code_putargi_s,
+    jit_code_putargr_us,       jit_code_putargi_us,
+    jit_code_putargr_i,                jit_code_putargi_i,
+    jit_code_putargr_ui,       jit_code_putargi_ui,
+    jit_code_putargr_l,                jit_code_putargi_l,
+#if __WORDSIZE == 32
+#  define jit_code_putargr     jit_code_putargr_i
+#  define jit_code_putargi     jit_code_putargi_i
+#else
+#  define jit_code_putargr     jit_code_putargr_l
+#  define jit_code_putargi     jit_code_putargi_l
+#endif
 
 #define jit_va_start(u)                jit_new_node_w(jit_code_va_start, u)
     jit_code_va_start,
 
 #define jit_va_start(u)                jit_new_node_w(jit_code_va_start, u)
     jit_code_va_start,
@@ -352,6 +407,10 @@ typedef enum {
 #define jit_movzr(u,v,w)       jit_new_node_www(jit_code_movzr,u,v,w)
     jit_code_movnr,            jit_code_movzr,
 
 #define jit_movzr(u,v,w)       jit_new_node_www(jit_code_movzr,u,v,w)
     jit_code_movnr,            jit_code_movzr,
 
+    jit_code_casr,             jit_code_casi,
+#define jit_casr(u, v, w, x)   jit_new_node_wwq(jit_code_casr, u, v, w, x)
+#define jit_casi(u, v, w, x)   jit_new_node_wwq(jit_code_casi, u, v, w, x)
+
 #define jit_extr_c(u,v)                jit_new_node_ww(jit_code_extr_c,u,v)
 #define jit_extr_uc(u,v)       jit_new_node_ww(jit_code_extr_uc,u,v)
     jit_code_extr_c,           jit_code_extr_uc,
 #define jit_extr_c(u,v)                jit_new_node_ww(jit_code_extr_c,u,v)
 #define jit_extr_uc(u,v)       jit_new_node_ww(jit_code_extr_uc,u,v)
     jit_code_extr_c,           jit_code_extr_uc,
@@ -364,6 +423,18 @@ typedef enum {
 #endif
     jit_code_extr_i,           jit_code_extr_ui,
 
 #endif
     jit_code_extr_i,           jit_code_extr_ui,
 
+#define jit_bswapr_us(u,v)     jit_new_node_ww(jit_code_bswapr_us,u,v)
+    jit_code_bswapr_us,
+#define jit_bswapr_ui(u,v)     jit_new_node_ww(jit_code_bswapr_ui,u,v)
+    jit_code_bswapr_ui,
+#define jit_bswapr_ul(u,v)     jit_new_node_ww(jit_code_bswapr_ul,u,v)
+    jit_code_bswapr_ul,
+#if __WORDSIZE == 32
+#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ui,u,v)
+#else
+#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ul,u,v)
+#endif
+
 #define jit_htonr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
 #define jit_ntohr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
     jit_code_htonr_us,
 #define jit_htonr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
 #define jit_ntohr_us(u,v)      jit_new_node_ww(jit_code_htonr_us,u,v)
     jit_code_htonr_us,
@@ -550,33 +621,106 @@ typedef enum {
 
 #define jit_prepare()          _jit_prepare(_jit)
     jit_code_prepare,
 
 #define jit_prepare()          _jit_prepare(_jit)
     jit_code_prepare,
-#define jit_pushargr(u)                _jit_pushargr(_jit,u)
-#define jit_pushargi(u)                _jit_pushargi(_jit,u)
-    jit_code_pushargr,         jit_code_pushargi,
+
+#define jit_pushargr_c(u)      _jit_pushargr(_jit,u,jit_code_pushargr_c)
+#define jit_pushargi_c(u)      _jit_pushargi(_jit,u,jit_code_pushargi_c)
+#define jit_pushargr_uc(u)     _jit_pushargr(_jit,u,jit_code_pushargr_uc)
+#define jit_pushargi_uc(u)     _jit_pushargi(_jit,u,jit_code_pushargi_uc)
+#define jit_pushargr_s(u)      _jit_pushargr(_jit,u,jit_code_pushargr_s)
+#define jit_pushargi_s(u)      _jit_pushargi(_jit,u,jit_code_pushargi_s)
+#define jit_pushargr_us(u)     _jit_pushargr(_jit,u,jit_code_pushargr_us)
+#define jit_pushargi_us(u)     _jit_pushargi(_jit,u,jit_code_pushargi_us)
+#define jit_pushargr_i(u)      _jit_pushargr(_jit,u,jit_code_pushargr_i)
+#define jit_pushargi_i(u)      _jit_pushargi(_jit,u,jit_code_pushargi_i)
+#if __WORDSIZE == 32
+#  define jit_pushargr(u)      jit_pushargr_i(u)
+#  define jit_pushargi(u)      jit_pushargi_i(u)
+#else
+#  define jit_pushargr_ui(u)   _jit_pushargr(_jit,u,jit_code_pushargr_ui)
+#  define jit_pushargi_ui(u)   _jit_pushargi(_jit,u,jit_code_pushargi_ui)
+#  define jit_pushargr_l(u)    _jit_pushargr(_jit,u,jit_code_pushargr_l)
+#  define jit_pushargi_l(u)    _jit_pushargi(_jit,u,jit_code_pushargi_l)
+#  define jit_pushargr(u)      jit_pushargr_l(u)
+#  define jit_pushargi(u)      jit_pushargi_l(u)
+#endif
+    jit_code_pushargr_c,       jit_code_pushargi_c,
+    jit_code_pushargr_uc,      jit_code_pushargi_uc,
+    jit_code_pushargr_s,       jit_code_pushargi_s,
+    jit_code_pushargr_us,      jit_code_pushargi_us,
+    jit_code_pushargr_i,       jit_code_pushargi_i,
+    jit_code_pushargr_ui,      jit_code_pushargi_ui,
+    jit_code_pushargr_l,       jit_code_pushargi_l,
+#if __WORDSIZE == 32
+#  define jit_code_pushargr    jit_code_pushargr_i
+#  define jit_code_pushargi    jit_code_pushargi_i
+#else
+#  define jit_code_pushargr    jit_code_pushargr_l
+#  define jit_code_pushargi    jit_code_pushargi_l
+#endif
+
 #define jit_finishr(u)         _jit_finishr(_jit,u)
 #define jit_finishi(u)         _jit_finishi(_jit,u)
     jit_code_finishr,          jit_code_finishi,
 #define jit_ret()              _jit_ret(_jit)
     jit_code_ret,
 #define jit_finishr(u)         _jit_finishr(_jit,u)
 #define jit_finishi(u)         _jit_finishi(_jit,u)
     jit_code_finishr,          jit_code_finishi,
 #define jit_ret()              _jit_ret(_jit)
     jit_code_ret,
-#define jit_retr(u)            _jit_retr(_jit,u)
-#define jit_reti(u)            _jit_reti(_jit,u)
-    jit_code_retr,             jit_code_reti,
+
+#define jit_retr_c(u)          _jit_retr(_jit,u,jit_code_retr_c)
+#define jit_reti_c(u)          _jit_reti(_jit,u,jit_code_reti_c)
+#define jit_retr_uc(u)         _jit_retr(_jit,u,jit_code_retr_uc)
+#define jit_reti_uc(u)         _jit_reti(_jit,u,jit_code_reti_uc)
+#define jit_retr_s(u)          _jit_retr(_jit,u,jit_code_retr_s)
+#define jit_reti_s(u)          _jit_reti(_jit,u,jit_code_reti_s)
+#define jit_retr_us(u)         _jit_retr(_jit,u,jit_code_retr_us)
+#define jit_reti_us(u)         _jit_reti(_jit,u,jit_code_reti_us)
+#define jit_retr_i(u)          _jit_retr(_jit,u,jit_code_retr_i)
+#define jit_reti_i(u)          _jit_reti(_jit,u,jit_code_reti_i)
+#if __WORDSIZE == 32
+#  define jit_retr(u)          jit_retr_i(u)
+#  define jit_reti(u)          jit_reti_i(u)
+#else
+#  define jit_retr_ui(u)       _jit_retr(_jit,u,jit_code_retr_ui)
+#  define jit_reti_ui(u)       _jit_reti(_jit,u,jit_code_reti_ui)
+#  define jit_retr_l(u)                _jit_retr(_jit,u,jit_code_retr_l)
+#  define jit_reti_l(u)                _jit_reti(_jit,u,jit_code_reti_l)
+#  define jit_retr(u)          jit_retr_l(u)
+#  define jit_reti(u)          jit_reti_l(u)
+#endif
+    jit_code_retr_c,           jit_code_reti_c,
+    jit_code_retr_uc,          jit_code_reti_uc,
+    jit_code_retr_s,           jit_code_reti_s,
+    jit_code_retr_us,          jit_code_reti_us,
+    jit_code_retr_i,           jit_code_reti_i,
+    jit_code_retr_ui,          jit_code_reti_ui,
+    jit_code_retr_l,           jit_code_reti_l,
+#if __WORDSIZE == 32
+#  define jit_code_retr                jit_code_retr_i
+#  define jit_code_reti                jit_code_reti_i
+#else
+#  define jit_code_retr                jit_code_retr_l
+#  define jit_code_reti                jit_code_reti_l
+#endif
+
 #define jit_retval_c(u)                _jit_retval_c(_jit,u)
 #define jit_retval_uc(u)       _jit_retval_uc(_jit,u)
 #define jit_retval_c(u)                _jit_retval_c(_jit,u)
 #define jit_retval_uc(u)       _jit_retval_uc(_jit,u)
-    jit_code_retval_c,         jit_code_retval_uc,
 #define jit_retval_s(u)                _jit_retval_s(_jit,u)
 #define jit_retval_us(u)       _jit_retval_us(_jit,u)
 #define jit_retval_s(u)                _jit_retval_s(_jit,u)
 #define jit_retval_us(u)       _jit_retval_us(_jit,u)
-    jit_code_retval_s,         jit_code_retval_us,
 #define jit_retval_i(u)                _jit_retval_i(_jit,u)
 #if __WORDSIZE == 32
 #  define jit_retval(u)                jit_retval_i(u)
 #else
 #define jit_retval_i(u)                _jit_retval_i(_jit,u)
 #if __WORDSIZE == 32
 #  define jit_retval(u)                jit_retval_i(u)
 #else
-#  define jit_retval(u)                jit_retval_l(u)
 #  define jit_retval_ui(u)     _jit_retval_ui(_jit,u)
 #  define jit_retval_l(u)      _jit_retval_l(_jit,u)
 #  define jit_retval_ui(u)     _jit_retval_ui(_jit,u)
 #  define jit_retval_l(u)      _jit_retval_l(_jit,u)
+#  define jit_retval(u)                jit_retval_l(u)
 #endif
 #endif
+    jit_code_retval_c,         jit_code_retval_uc,
+    jit_code_retval_s,         jit_code_retval_us,
     jit_code_retval_i,         jit_code_retval_ui,
     jit_code_retval_l,
     jit_code_retval_i,         jit_code_retval_ui,
     jit_code_retval_l,
+#if __WORDSIZE == 32
+#  define jit_code_retval      jit_code_retval_i
+#else
+#  define jit_code_retval      jit_code_retval_l
+#endif
 
 #define jit_epilog()           _jit_epilog(_jit)
     jit_code_epilog,
 
 #define jit_epilog()           _jit_epilog(_jit)
     jit_code_epilog,
@@ -904,21 +1048,13 @@ typedef enum {
 #define jit_movr_d_w(u, v)     jit_new_node_ww(jit_code_movr_d_w, u, v)
 #define jit_movi_d_w(u, v)     jit_new_node_wd(jit_code_movi_d_w, u, v)
 
 #define jit_movr_d_w(u, v)     jit_new_node_ww(jit_code_movr_d_w, u, v)
 #define jit_movi_d_w(u, v)     jit_new_node_wd(jit_code_movi_d_w, u, v)
 
-#define jit_bswapr_us(u,v)     jit_new_node_ww(jit_code_bswapr_us,u,v)
-    jit_code_bswapr_us,
-#define jit_bswapr_ui(u,v)     jit_new_node_ww(jit_code_bswapr_ui,u,v)
-    jit_code_bswapr_ui,
-#define jit_bswapr_ul(u,v)     jit_new_node_ww(jit_code_bswapr_ul,u,v)
-    jit_code_bswapr_ul,
-#if __WORDSIZE == 32
-#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ui,u,v)
-#else
-#define jit_bswapr(u,v)                jit_new_node_ww(jit_code_bswapr_ul,u,v)
-#endif
+#define jit_clor(u,v)          jit_new_node_ww(jit_code_clor,u,v)
+#define jit_clzr(u,v)          jit_new_node_ww(jit_code_clzr,u,v)
+    jit_code_clor,             jit_code_clzr,
 
 
-    jit_code_casr,             jit_code_casi,
-#define jit_casr(u, v, w, x)   jit_new_node_wwq(jit_code_casr, u, v, w, x)
-#define jit_casi(u, v, w, x)   jit_new_node_wwq(jit_code_casi, u, v, w, x)
+#define jit_ctor(u,v)          jit_new_node_ww(jit_code_ctor,u,v)
+#define jit_ctzr(u,v)          jit_new_node_ww(jit_code_ctzr,u,v)
+    jit_code_ctor,             jit_code_ctzr,
 
     jit_code_last_code
 } jit_code_t;
 
     jit_code_last_code
 } jit_code_t;
@@ -960,7 +1096,8 @@ extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t);
 extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t);
 extern void _jit_ellipsis(jit_state_t*);
 
 extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t);
 extern void _jit_ellipsis(jit_state_t*);
 
-extern jit_node_t *_jit_arg(jit_state_t*);
+extern jit_node_t *_jit_arg(jit_state_t*, jit_code_t);
+
 extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*);
@@ -970,19 +1107,24 @@ extern void _jit_getarg_i(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*);
 #endif
 extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*);
 extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*);
 #endif
-extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*);
-extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*);
+
+extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*, jit_code_t);
+extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*, jit_code_t);
 
 extern void _jit_prepare(jit_state_t*);
 extern void _jit_ellipsis(jit_state_t*);
 extern void _jit_va_push(jit_state_t*, jit_gpr_t);
 
 extern void _jit_prepare(jit_state_t*);
 extern void _jit_ellipsis(jit_state_t*);
 extern void _jit_va_push(jit_state_t*, jit_gpr_t);
-extern void _jit_pushargr(jit_state_t*, jit_gpr_t);
-extern void _jit_pushargi(jit_state_t*, jit_word_t);
+
+extern void _jit_pushargr(jit_state_t*, jit_gpr_t, jit_code_t);
+extern void _jit_pushargi(jit_state_t*, jit_word_t, jit_code_t);
+
 extern void _jit_finishr(jit_state_t*, jit_gpr_t);
 extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t);
 extern void _jit_ret(jit_state_t*);
 extern void _jit_finishr(jit_state_t*, jit_gpr_t);
 extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t);
 extern void _jit_ret(jit_state_t*);
-extern void _jit_retr(jit_state_t*, jit_gpr_t);
-extern void _jit_reti(jit_state_t*, jit_word_t);
+
+extern void _jit_retr(jit_state_t*, jit_gpr_t, jit_code_t);
+extern void _jit_reti(jit_state_t*, jit_word_t, jit_code_t);
+
 extern void _jit_retval_c(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_uc(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_s(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_c(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_uc(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_s(jit_state_t*, jit_gpr_t);
@@ -992,6 +1134,7 @@ extern void _jit_retval_i(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_ui(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_l(jit_state_t*, jit_gpr_t);
 #endif
 extern void _jit_retval_ui(jit_state_t*, jit_gpr_t);
 extern void _jit_retval_l(jit_state_t*, jit_gpr_t);
 #endif
+
 extern void _jit_epilog(jit_state_t*);
 
 #define jit_patch(u)           _jit_patch(_jit,u)
 extern void _jit_epilog(jit_state_t*);
 
 #define jit_patch(u)           _jit_patch(_jit,u)
@@ -1016,6 +1159,10 @@ extern void _jit_frame(jit_state_t*, jit_int32_t);
 extern void _jit_tramp(jit_state_t*, jit_int32_t);
 #define jit_emit()             _jit_emit(_jit)
 extern jit_pointer_t _jit_emit(jit_state_t*);
 extern void _jit_tramp(jit_state_t*, jit_int32_t);
 #define jit_emit()             _jit_emit(_jit)
 extern jit_pointer_t _jit_emit(jit_state_t*);
+#define jit_unprotect()         _jit_unprotect(_jit)
+extern void _jit_unprotect(jit_state_t*);
+#define jit_protect()           _jit_protect(_jit)
+extern void _jit_protect(jit_state_t*);
 
 #define jit_print()            _jit_print(_jit)
 extern void _jit_print(jit_state_t*);
 
 #define jit_print()            _jit_print(_jit)
 extern void _jit_print(jit_state_t*);
index 6a435f1..3086499 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
 #define JIT_HASH_CONSTS                0
 #define JIT_NUM_OPERANDS       3
 
 #define JIT_HASH_CONSTS                0
 #define JIT_NUM_OPERANDS       3
 
+#if __APPLE__
+#  define PACKED_STACK         1
+#endif
+
 /*
  * Types
  */
 /*
  * Types
  */
index 3593431..7986c34 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index 8f7278d..0ed9535 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -106,6 +106,9 @@ typedef enum {
 
 typedef struct {
     jit_uint32_t version       : 4;
 
 typedef struct {
     jit_uint32_t version       : 4;
+    /* this field originally was only used for the 'e' in armv5te.
+     * it can also be used to force hardware division, if setting
+     * version to 7, telling it is armv7r or better. */
     jit_uint32_t extend                : 1;
     /* only generate thumb instructions for thumb2 */
     jit_uint32_t thumb         : 1;
     jit_uint32_t extend                : 1;
     /* only generate thumb instructions for thumb2 */
     jit_uint32_t thumb         : 1;
@@ -117,6 +120,12 @@ typedef struct {
      * due to some memory ordering constraint not being respected, so,
      * disable by default */
     jit_uint32_t ldrt_strt     : 1;
      * due to some memory ordering constraint not being respected, so,
      * disable by default */
     jit_uint32_t ldrt_strt     : 1;
+    /* assume functions called never match jit instruction set?
+     * that is libc, gmp, mpfr, etc functions are in thumb mode and jit
+     * is in arm mode, or the reverse, what may cause a crash upon return
+     * of that function if generating jit for a relative jump.
+     */
+    jit_uint32_t exchange      : 1;
 } jit_cpu_t;
 
 /*
 } jit_cpu_t;
 
 /*
index afdf21d..df361ba 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index 7b212b9..e45818a 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -117,4 +117,13 @@ typedef enum {
     _NOREG,
 } jit_reg_t;
 
     _NOREG,
 } jit_reg_t;
 
+typedef struct {
+    jit_uint32_t clz           : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
 #endif /* _jit_ia64_h */
 #endif /* _jit_ia64_h */
index 44982ec..89b1a86 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2022  Free Software Foundation, Inc.
+ * Copyright (C) 2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index a2388c9..52aebcc 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -25,6 +25,8 @@
 
 #if _MIPS_SIM != _ABIO32
 #    define NEW_ABI            1
 
 #if _MIPS_SIM != _ABIO32
 #    define NEW_ABI            1
+#else
+#    define NEW_ABI            0
 #endif
 
 /*
 #endif
 
 /*
@@ -114,4 +116,13 @@ typedef enum {
     _NOREG,
 } jit_reg_t;
 
     _NOREG,
 } jit_reg_t;
 
+typedef struct {
+    jit_uint32_t release       : 4;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
 #endif /* _jit_mips_h */
 #endif /* _jit_mips_h */
index d3d25d3..460c491 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -22,6 +22,9 @@
 
 #define JIT_HASH_CONSTS                1
 #define JIT_NUM_OPERANDS       3
 
 #define JIT_HASH_CONSTS                1
 #define JIT_NUM_OPERANDS       3
+#if defined(_AIX) && !defined(_CALL_AIX) && !defined(_CALL_LINUX)
+#  define _CALL_AIXDESC                1
+#endif
 
 /*
  * Types
 
 /*
  * Types
index d0420b8..444a295 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
 #  define HIDDEN               /**/
 #endif
 
 #  define HIDDEN               /**/
 #endif
 
+#if PACKED_STACK || STRONG_TYPE_CHECKING
+#  define assert_arg_type(code, expect)                                        \
+    do assert((code) == (expect)); while (0)
+#  define assert_putarg_type(code, expect)                             \
+    do                                                                 \
+       assert((((code) - jit_code_putargr_c) >> 2) ==                  \
+              ((expect) - jit_code_arg_c));                            \
+    while (0)
+#else
+#  define assert_arg_type(code, expect)                                        \
+    do assert((int)(code) == (int)(expect) ||                          \
+             (code) == jit_code_arg); while (0)
+#  define assert_putarg_type(code, expect)                             \
+    do                                                                 \
+       assert(((((code) - jit_code_putargr_c) >> 2) ==                 \
+              ((expect) - jit_code_arg_c)) ||                          \
+              ((code) == jit_code_arg));                               \
+    while (0)
+#endif
+
 #define rc(value)              jit_class_##value
 #define rn(reg)                        (jit_regno(_rvs[jit_regno(reg)].spec))
 
 #define rc(value)              jit_class_##value
 #define rn(reg)                        (jit_regno(_rvs[jit_regno(reg)].spec))
 
@@ -174,48 +194,80 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
     (!jit_regset_tstbit(&_jitc->regarg, regno) &&                      \
      !jit_regset_tstbit(&_jitc->regsav, regno))
 
     (!jit_regset_tstbit(&_jitc->regarg, regno) &&                      \
      !jit_regset_tstbit(&_jitc->regsav, regno))
 
-#define jit_inc_synth(code)                                            \
+#define jit_code_inc_synth(code)                                       \
     do {                                                               \
     do {                                                               \
-       (void)jit_new_node(jit_code_##code);                            \
+       (void)jit_new_node(code);                                       \
        jit_synth_inc();                                                \
     } while (0)
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_w(code, u)                                       \
+#define jit_inc_synth(name)                                            \
+    jit_code_inc_synth(jit_code_##name)
+#define jit_code_inc_synth_w(code, u)                                  \
     do {                                                               \
     do {                                                               \
-       (void)jit_new_node_w(jit_code_##code, u);                       \
+       (void)jit_new_node_w(code, u);                                  \
        jit_synth_inc();                                                \
     } while (0)
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_f(code, u)                                       \
+#define jit_inc_synth_w(name, u)                                       \
+    jit_code_inc_synth_w(jit_code_##name, u)
+#define jit_code_inc_synth_f(code, u)                                  \
     do {                                                               \
     do {                                                               \
-       (void)jit_new_node_f(jit_code_##code, u);                       \
+       (void)jit_new_node_f(code, u);                                  \
        jit_synth_inc();                                                \
     } while (0)
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_d(code, u)                                       \
+#define jit_inc_synth_f(name, u)                                       \
+    jit_code_inc_synth_f(jit_code_##name, u)
+#define jit_code_inc_synth_d(code, u)                                  \
     do {                                                               \
     do {                                                               \
-       (void)jit_new_node_d(jit_code_##code, u);                       \
+       (void)jit_new_node_d(code, u);                                  \
        jit_synth_inc();                                                \
     } while (0)
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_ww(code, u, v)                                   \
+#define jit_inc_synth_d(name, u)                                       \
+    jit_code_inc_synth_d(jit_code_##name, u)
+#define jit_code_inc_synth_ww(code, u, v)                              \
     do {                                                               \
     do {                                                               \
-       (void)jit_new_node_ww(jit_code_##code, u, v);                   \
+       (void)jit_new_node_ww(code, u, v);                              \
        jit_synth_inc();                                                \
     } while (0)
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_wp(code, u, v)                                   \
+#define jit_inc_synth_ww(name, u, v)                                   \
+    jit_code_inc_synth_ww(jit_code_##name, u, v)
+#define jit_code_inc_synth_wp(code, u, v)                              \
     do {                                                               \
     do {                                                               \
-       (void)jit_new_node_wp(jit_code_##code, u, v);                   \
+       (void)jit_new_node_wp(code, u, v);                              \
        jit_synth_inc();                                                \
     } while (0)
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_fp(code, u, v)                                   \
+#define jit_inc_synth_wp(name, u, v)                                   \
+    jit_code_inc_synth_wp(jit_code_##name, u, v)
+#define jit_code_inc_synth_fp(code, u, v)                              \
     do {                                                               \
     do {                                                               \
-       (void)jit_new_node_fp(jit_code_##code, u, v);                   \
+       (void)jit_new_node_fp(code, u, v);                              \
        jit_synth_inc();                                                \
     } while (0)
        jit_synth_inc();                                                \
     } while (0)
-#define jit_inc_synth_dp(code, u, v)                                   \
+#define jit_inc_synth_fp(name, u, v)                                   \
+    jit_code_inc_synth_fp(jit_code_##name, u, v)
+#define jit_code_inc_synth_dp(code, u, v)                              \
     do {                                                               \
     do {                                                               \
-       (void)jit_new_node_dp(jit_code_##code, u, v);                   \
+       (void)jit_new_node_dp(code, u, v);                              \
        jit_synth_inc();                                                \
     } while (0)
        jit_synth_inc();                                                \
     } while (0)
+#define jit_inc_synth_dp(name, u, v)                                   \
+    jit_code_inc_synth_dp(jit_code_##name, u, v)
 #define jit_dec_synth()                jit_synth_dec()
 
 #define jit_dec_synth()                jit_synth_dec()
 
+#define jit_link_alist(node)                                           \
+    do {                                                               \
+       node->link = _jitc->function->alist;                            \
+       _jitc->function->alist = node;                                  \
+    } while (0)
+#define jit_check_frame()                                              \
+    do {                                                               \
+       if (!_jitc->function->need_frame) {                             \
+           _jitc->again = 1;                                           \
+           _jitc->function->need_frame = 1;                            \
+       }                                                               \
+    } while (0)
+#define jit_diffsize() (stack_framesize - _jitc->framesize)
+#define jit_framesize()        (stack_framesize - jit_diffsize())
+#define jit_selfsize() (_jitc->function->self.size - jit_diffsize())
+
 #define jit_link_prolog()                                              \
     do {                                                               \
        _jitc->tail->link = _jitc->function->prolog->link;              \
 #define jit_link_prolog()                                              \
     do {                                                               \
        _jitc->tail->link = _jitc->function->prolog->link;              \
@@ -248,8 +300,8 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
 #define jit_class_xpr          0x80000000      /* float / vector */
 /* Used on sparc64 where %f0-%f31 can be encode for single float
  * but %f32 to %f62 only as double precision */
 #define jit_class_xpr          0x80000000      /* float / vector */
 /* Used on sparc64 where %f0-%f31 can be encode for single float
  * but %f32 to %f62 only as double precision */
-#define jit_class_sng          0x10000000      /* Single precision float */
-#define jit_class_dbl          0x20000000      /* Only double precision float */
+#define jit_class_sng          0x00010000      /* Single precision float */
+#define jit_class_dbl          0x00020000      /* Only double precision float */
 #define jit_regno_patch                0x00008000      /* this is a register
                                                 * returned by a "user" call
                                                 * to jit_get_reg() */
 #define jit_regno_patch                0x00008000      /* this is a register
                                                 * returned by a "user" call
                                                 * to jit_get_reg() */
@@ -474,9 +526,14 @@ struct jit_function {
     } call;
     jit_node_t         *prolog;
     jit_node_t         *epilog;
     } call;
     jit_node_t         *prolog;
     jit_node_t         *epilog;
+    jit_node_t         *alist;
     jit_int32_t                *regoff;
     jit_regset_t        regset;
     jit_int32_t                 stack;
     jit_int32_t                *regoff;
     jit_regset_t        regset;
     jit_int32_t                 stack;
+#if defined(__i386__) || defined(__x86_64__)
+    jit_int32_t                 cvt_offset;    /* allocai'd offset for x87<->xmm or
+                                        * fpr<->gpr transfer using the stack */
+#endif
 
     /* Helper for common jit generation pattern, used in GNU Smalltalk
      * and possibly others, where a static frame layout is required or
 
     /* Helper for common jit generation pattern, used in GNU Smalltalk
      * and possibly others, where a static frame layout is required or
@@ -485,11 +542,25 @@ struct jit_function {
     jit_uint32_t        define_frame : 1;
     jit_uint32_t        assume_frame : 1;
 
     jit_uint32_t        define_frame : 1;
     jit_uint32_t        assume_frame : 1;
 
+    jit_uint32_t        need_frame : 1;        /* need frame pointer? */
+    jit_uint32_t        need_stack : 1;        /* need stack pointer? */
+    jit_uint32_t        need_return : 1;       /* not a leaf function */
+
     /* alloca offset offset */
     jit_int32_t                 aoffoff;
     /* uses allocar flag */
     jit_uint32_t        allocar : 1;
 
     /* alloca offset offset */
     jit_int32_t                 aoffoff;
     /* uses allocar flag */
     jit_uint32_t        allocar : 1;
 
+#if __arm__
+    /* If will, or might use float registers and vfp is not available.
+     * Use the first 64 bytes always, as the access to the virtual float
+     * registers use hardcoded instructions that can only reach 64 byte
+     * displacements, and to keep code simpler, do not use temporaries. */
+    jit_uint32_t         swf_offset : 1;
+    /* If need to call C functions for some operation, or variadic function */
+    jit_uint32_t         save_reg_args : 1;
+#endif
+
     /* varargs state offsets */
     jit_int32_t                 vaoff;         /* offset of jit_va_list */
     jit_int32_t                 vagp;          /* first gp va argument */
     /* varargs state offsets */
     jit_int32_t                 vaoff;         /* offset of jit_va_list */
     jit_int32_t                 vagp;          /* first gp va argument */
@@ -509,6 +580,13 @@ struct jit_compiler {
     jit_int32_t                  rout;         /* first output register */
     jit_int32_t                  breg;         /* base register for prolog/epilog */
 #endif
     jit_int32_t                  rout;         /* first output register */
     jit_int32_t                  breg;         /* base register for prolog/epilog */
 #endif
+#if __mips__
+    struct {
+       jit_int32_t       op;           /* pending instruction, candidate
+                                        * to be inserted in a delay slot */
+       jit_bool_t        pend;         /* non zero if need to emit op */
+    } inst;
+#endif
 #if __mips__ || __ia64__ || __alpha__ || \
        (__sparc__ && __WORDSIZE == 64) || __riscv || __loongarch__
     jit_int32_t                  carry;
 #if __mips__ || __ia64__ || __alpha__ || \
        (__sparc__ && __WORDSIZE == 64) || __riscv || __loongarch__
     jit_int32_t                  carry;
@@ -528,11 +606,14 @@ struct jit_compiler {
 #endif
     jit_uint32_t         no_data : 1;
     jit_uint32_t         no_note : 1;
 #endif
     jit_uint32_t         no_data : 1;
     jit_uint32_t         no_note : 1;
+    jit_int32_t                  framesize;    /* space for callee save registers,
+                                        * frame pointer and return address */
     jit_int32_t                  reglen;       /* number of registers */
     jit_regset_t         regarg;       /* cannot allocate */
     jit_regset_t         regsav;       /* automatic spill only once */
     jit_regset_t         reglive;      /* known live registers at some point */
     jit_regset_t         regmask;      /* register mask to update reglive */
     jit_int32_t                  reglen;       /* number of registers */
     jit_regset_t         regarg;       /* cannot allocate */
     jit_regset_t         regsav;       /* automatic spill only once */
     jit_regset_t         reglive;      /* known live registers at some point */
     jit_regset_t         regmask;      /* register mask to update reglive */
+    jit_regset_t         explive;      /* explicitly marked as live */
     struct {
        jit_uint8_t      *end;
     } code;
     struct {
        jit_uint8_t      *end;
     } code;
@@ -657,6 +738,8 @@ struct jit_state {
     struct {
        jit_uint8_t     *ptr;
        jit_word_t       length;
     struct {
        jit_uint8_t     *ptr;
        jit_word_t       length;
+        /* PROTECTED bytes starting at PTR are mprotect'd. */
+        jit_word_t       protected;
     } code;
     struct {
        jit_uint8_t     *ptr;
     } code;
     struct {
        jit_uint8_t     *ptr;
index ad3f76f..bf59c5b 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2019-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index a28b0dd..d51cfec 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -65,4 +65,13 @@ typedef enum {
 #define JIT_NOREG              _NOREG
 } jit_reg_t;
 
 #define JIT_NOREG              _NOREG
 } jit_reg_t;
 
+typedef struct {
+    jit_uint32_t flogr         : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
 #endif /* _jit_s390_h */
 #endif /* _jit_s390_h */
index e5988e1..ec21be9 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -99,4 +99,13 @@ typedef enum {
     _NOREG,
 } jit_reg_t;
 
     _NOREG,
 } jit_reg_t;
 
+typedef struct {
+    jit_uint32_t lzcnt         : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
 #endif /* _jit_sparc_h */
 #endif /* _jit_sparc_h */
index 91f9124..4c48013 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -189,6 +189,10 @@ typedef struct {
     jit_uint32_t avx           : 1;
     /* lahf/sahf available in 64 bits mode */
     jit_uint32_t lahf          : 1;
     jit_uint32_t avx           : 1;
     /* lahf/sahf available in 64 bits mode */
     jit_uint32_t lahf          : 1;
+    /* lzcnt and tzcnt? */
+    jit_uint32_t abm           : 1;
+    /* adcx and adox instructions available? */
+    jit_uint32_t adx           : 1;
 } jit_cpu_t;
 
 /*
 } jit_cpu_t;
 
 /*
index a30e7fd..44ac4f2 100644 (file)
 AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include  \
        -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 liblightning_LTLIBRARIES = liblightning.la
 AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include  \
        -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 liblightning_LTLIBRARIES = liblightning.la
-liblightning_la_LDFLAGS = -version-info 1:0:0
+liblightning_la_LDFLAGS = -version-info 2:0:0
 
 
+AM_CPPFLAGS =
 if get_jit_size
 JIT_SIZE_PATH = "$(top_builddir)/jit_$(cpu)-sz.c"
 if get_jit_size
 JIT_SIZE_PATH = "$(top_builddir)/jit_$(cpu)-sz.c"
-AM_CPPFLAGS=-DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
+AM_CPPFLAGS += -DGET_JIT_SIZE=1 -DJIT_SIZE_PATH='$(JIT_SIZE_PATH)'
+endif
+if strong_type_checking
+AM_CPPFLAGS += -DSTRONG_TYPE_CHECKING=1
 endif
 
 liblightningdir = $(libdir)
 liblightning_la_SOURCES =      \
        jit_disasm.c            \
        jit_memory.c            \
 endif
 
 liblightningdir = $(libdir)
 liblightning_la_SOURCES =      \
        jit_disasm.c            \
        jit_memory.c            \
-       jit_names.c             \
        jit_note.c              \
        jit_print.c             \
        jit_size.c              \
        lightning.c
 
 EXTRA_DIST =                   \
        jit_note.c              \
        jit_print.c             \
        jit_size.c              \
        lightning.c
 
 EXTRA_DIST =                   \
+       jit_names.c             \
        jit_fallback.c          \
        jit_rewind.c            \
        jit_fallback.c          \
        jit_rewind.c            \
+       aarch64-logical-immediates.c    \
        jit_aarch64.c           \
        jit_aarch64-cpu.c       \
        jit_aarch64-fpu.c       \
        jit_aarch64.c           \
        jit_aarch64-cpu.c       \
        jit_aarch64-fpu.c       \
diff --git a/deps/lightning/lib/aarch64-logical-immediates.c b/deps/lightning/lib/aarch64-logical-immediates.c
new file mode 100644 (file)
index 0000000..c1e1ab0
--- /dev/null
@@ -0,0 +1,161 @@
+// AArch64 Logical Immediate Encoding and Decoding
+//
+// I hereby place this code in the public domain, as per the terms of the
+// CC0 license: https://creativecommons.org/publicdomain/zero/1.0/
+
+#include <stdint.h>
+#include <stdbool.h>
+
+static inline int nonzeroCountTrailingZeros64(uint64_t n) {
+    return __builtin_ctzll(n);
+}
+
+static inline int countTrailingZeros64(uint64_t n) {
+    return n ? nonzeroCountTrailingZeros64(n) : 64;
+}
+
+static inline int nonzeroCountLeadingZeros64(uint64_t n) {
+    return __builtin_clzll(n);
+}
+
+static inline int nonzeroCountLeadingZeros32(uint32_t n) {
+    return __builtin_clz(n);
+}
+
+static inline uint64_t rotateRight64(uint64_t v, int n) {
+    // return __builtin_rotateright64(v, n);
+    return (v >> (n & 63)) | (v << (-n & 63));
+}
+
+static inline uint64_t clearTrailingOnes64(uint64_t n) {
+    return n & (n + 1);
+}
+
+#define ENCODE_FAILED (-1)
+
+int encodeLogicalImmediate64(uint64_t val) {
+    // Consider an ARM64 logical immediate as a pattern of "o" ones preceded
+    // by "z" more-significant zeroes, repeated to fill a 64-bit integer.
+    // o > 0, z > 0, and the size (o + z) is a power of two in [2,64]. This
+    // part of the pattern is encoded in the fields "imms" and "N".
+    //
+    // "immr" encodes a further right rotate of the repeated pattern, allowing
+    // a wide range of useful bitwise constants to be represented.
+    //
+    // (The spec describes the "immr" rotate as rotating the "o + z" bit
+    // pattern before repeating it to fill 64-bits, but, as it's a repeating
+    // pattern, rotating afterwards is equivalent.)
+
+    // This encoding is not allowed to represent all-zero or all-one values.
+    if (val == 0 || ~val == 0)
+        return ENCODE_FAILED;
+
+    // To detect an immediate that may be encoded in this scheme, we first
+    // remove the right-rotate, by rotating such that the least significant
+    // bit is a one and the most significant bit is a zero.
+    //
+    // We do this by clearing any trailing one bits, then counting the
+    // trailing zeroes. This finds an "edge", where zero goes to one.
+    // We then rotate the original value right by that amount, moving
+    // the first one to the least significant bit.
+
+    int rotation = countTrailingZeros64(clearTrailingOnes64(val));
+    uint64_t normalized = rotateRight64(val, rotation & 63);
+
+    // Now we have normalized the value, and determined the rotation, we can
+    // determine "z" by counting the leading zeroes, and "o" by counting the
+    // trailing ones. (These will both be positive, as we already rejected 0
+    // and ~0, and rotated the value to start with a zero and end with a one.)
+
+    int zeroes = nonzeroCountLeadingZeros64(normalized);
+    int ones = nonzeroCountTrailingZeros64(~normalized);
+    int size = zeroes + ones;
+
+    // Detect the repeating pattern (by comparing every repetition to the
+    // one next to it, using rotate).
+
+    if (rotateRight64(val, size & 63) != val)
+        return ENCODE_FAILED;
+
+    // We do not need to further validate size to ensure it is a power of two
+    // between 2 and 64. The only "minimal" patterns that can repeat to fill a
+    // 64-bit value must have a length that is a factor of 64 (i.e. it is a
+    // power of two in the range [1,64]). And our pattern cannot be of length
+    // one (as we already rejected 0 and ~0).
+    //
+    // By "minimal" patterns I refer to patterns which do not themselves
+    // contain repetitions. For example, '010101' is a non-minimal pattern of
+    // a non-power-of-two length that can pass the above rotational test. It
+    // consists of the minimal pattern '01'. All our patterns are minimal, as
+    // they contain only one contiguous run of ones separated by at least one
+    // zero.
+
+    // Finally, we encode the values. "rotation" is the amount we rotated
+    // right by to "undo" the right-rotate encoded in immr, so must be
+    // negated.
+
+    // size 2:  N=0 immr=00000r imms=11110s
+    // size 4:  N=0 immr=0000rr imms=1110ss
+    // size 8:  N=0 immr=000rrr imms=110sss
+    // size 16: N=0 immr=00rrrr imms=10ssss
+    // size 32: N=0 immr=0rrrrr imms=0sssss
+    // size 64: N=1 immr=rrrrrr imms=ssssss
+    int immr = -rotation & (size - 1);
+    int imms = -(size << 1) | (ones - 1);
+    int N = (size >> 6);
+
+    return (N << 12) | (immr << 6) | (imms & 0x3f);
+}
+
+int encodeLogicalImmediate32(uint32_t val) {
+    return encodeLogicalImmediate64(((uint64_t)val << 32) | val);
+}
+
+// Decoding!
+
+bool isValidLogicalImmediate64(unsigned val) {
+    unsigned N = (val >> 12) & 1;
+    unsigned imms = val & 0x3f;
+    unsigned pattern = (N << 6) | (~imms & 0x3f);
+    return (pattern & (pattern - 1)) != 0;
+}
+
+bool isValidLogicalImmediate32(unsigned val) {
+    unsigned N = (val >> 12) & 1;
+    return N == 0 && isValidLogicalImmediate64(val);
+}
+
+#define DECODE_FAILED 0
+
+// returns DECODE_FAILED (zero) if the encoding is invalid
+uint64_t decodeLogicalImmediate64(unsigned val) {
+    // Fun way to generate the immediates with mask ^ (mask << S)
+    static const uint64_t mask_lookup[] = {
+        0xffffffffffffffff, // size = 64
+        0x00000000ffffffff, // size = 32
+        0x0000ffff0000ffff, // size = 16
+        0x00ff00ff00ff00ff, // size = 8
+        0x0f0f0f0f0f0f0f0f, // size = 4
+        0x3333333333333333, // size = 2
+    };
+
+    unsigned N = (val >> 12) & 1;
+    int immr = (val >> 6) & 0x3f;
+    unsigned imms = val & 0x3f;
+
+    unsigned pattern = (N << 6) | (~imms & 0x3f);
+
+    if (!(pattern & (pattern - 1))) return DECODE_FAILED;
+
+    int leading_zeroes = nonzeroCountLeadingZeros32(pattern);
+    unsigned imms_mask = 0x7fffffff >> leading_zeroes;
+    uint64_t mask = mask_lookup[leading_zeroes - 25];
+    unsigned S = (imms + 1) & imms_mask;
+    return rotateRight64(mask ^ (mask << S), immr);
+}
+
+uint32_t decodeLogicalImmediate32(unsigned val) {
+    unsigned N = (val >> 12) & 1;
+    if (N) return DECODE_FAILED;
+    return (uint32_t)decodeLogicalImmediate64(val);
+}
index 35ddabf..d5e64ad 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -210,7 +210,7 @@ typedef union {
     jit_int32_t                w;
 #  undef ui
 } instr_t;
     jit_int32_t                w;
 #  undef ui
 } instr_t;
-#  define stack_framesize              160
+#  define s26_p(d)                     ((d) >= -33554432 && (d) <= 33554431)
 #  define ii(i)                                *_jit->pc.ui++ = i
 #  define ldr(r0,r1)                   ldr_l(r0,r1)
 #  define ldxr(r0,r1,r2)               ldxr_l(r0,r1,r2)
 #  define ii(i)                                *_jit->pc.ui++ = i
 #  define ldr(r0,r1)                   ldr_l(r0,r1)
 #  define ldxr(r0,r1,r2)               ldxr_l(r0,r1,r2)
@@ -349,6 +349,9 @@ typedef union {
 #  define A64_ORR                      0x2a000000
 #  define A64_MOV                      0x2a0003e0      /* AKA orr Rd,xzr,Rm */
 #  define A64_MVN                      0x2a2003e0
 #  define A64_ORR                      0x2a000000
 #  define A64_MOV                      0x2a0003e0      /* AKA orr Rd,xzr,Rm */
 #  define A64_MVN                      0x2a2003e0
+#  define A64_CLS                      0x5ac01400
+#  define A64_CLZ                      0x5ac01000
+#  define A64_RBIT                     0x5ac00000
 #  define A64_UXTW                     0x2a0003e0      /* AKA MOV */
 #  define A64_EOR                      0x4a000000
 #  define A64_ANDS                     0x6a000000
 #  define A64_UXTW                     0x2a0003e0      /* AKA MOV */
 #  define A64_EOR                      0x4a000000
 #  define A64_ANDS                     0x6a000000
@@ -370,6 +373,9 @@ typedef union {
 #  define MOV(Rd,Rm)                   ox_x(A64_MOV|XS,Rd,Rm)
 #  define MVN(Rd,Rm)                   ox_x(A64_MVN|XS,Rd,Rm)
 #  define NEG(Rd,Rm)                   ox_x(A64_NEG|XS,Rd,Rm)
 #  define MOV(Rd,Rm)                   ox_x(A64_MOV|XS,Rd,Rm)
 #  define MVN(Rd,Rm)                   ox_x(A64_MVN|XS,Rd,Rm)
 #  define NEG(Rd,Rm)                   ox_x(A64_NEG|XS,Rd,Rm)
+#  define CLS(Rd,Rm)                   o_xx(A64_CLS|XS,Rd,Rm)
+#  define CLZ(Rd,Rm)                   o_xx(A64_CLZ|XS,Rd,Rm)
+#  define RBIT(Rd,Rm)                  o_xx(A64_RBIT|XS,Rd,Rm)
 #  define MOVN(Rd,Imm16)               ox_h(A64_MOVN|XS,Rd,Imm16)
 #  define MOVN_16(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16)
 #  define MOVN_32(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16)
 #  define MOVN(Rd,Imm16)               ox_h(A64_MOVN|XS,Rd,Imm16)
 #  define MOVN_16(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16)
 #  define MOVN_32(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16)
@@ -584,6 +590,14 @@ static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define negr(r0,r1)                  NEG(r0,r1)
 #  define comr(r0,r1)                  MVN(r0,r1)
 static void _movzr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define negr(r0,r1)                  NEG(r0,r1)
 #  define comr(r0,r1)                  MVN(r0,r1)
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 CLZ(r0,r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define andr(r0,r1,r2)               AND(r0,r1,r2)
 #  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
 static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define andr(r0,r1,r2)               AND(r0,r1,r2)
 #  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
 static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -781,12 +795,12 @@ _bmxi(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
 #  define bmci(i0,r0,i1)               bmxi(BCC_EQ,i0,r0,i1)
 #  define jmpr(r0)                     BR(r0)
 #  define jmpi(i0)                     _jmpi(_jit,i0)
 #  define bmci(i0,r0,i1)               bmxi(BCC_EQ,i0,r0,i1)
 #  define jmpr(r0)                     BR(r0)
 #  define jmpi(i0)                     _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    BLR(r0)
 #  define calli(i0)                    _calli(_jit,i0)
 #  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    BLR(r0)
 #  define calli(i0)                    _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(i0)                   _prolog(_jit,i0)
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(i0)                   _prolog(_jit,i0)
@@ -802,36 +816,17 @@ static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
 #endif
 
 #if CODE
 #endif
 
 #if CODE
+/* https://dougallj.wordpress.com/2021/10/30/bit-twiddling-optimising-aarch64-logical-immediate-encoding-and-decoding/ */
+#include "aarch64-logical-immediates.c"
 static jit_int32_t
 logical_immediate(jit_word_t imm)
 {
 static jit_int32_t
 logical_immediate(jit_word_t imm)
 {
-    /* There are 5334 possible immediate values, but to avoid the
-     * need of either too complex code or large lookup tables,
-     * only check for (simply) encodable common/small values */
-    switch (imm) {
-       case -16:       return (0xf3b);
-       case -15:       return (0xf3c);
-       case -13:       return (0xf3d);
-       case -9:        return (0xf3e);
-       case -8:        return (0xf7c);
-       case -7:        return (0xf7d);
-       case -5:        return (0xf7e);
-       case -4:        return (0xfbd);
-       case -3:        return (0xfbe);
-       case -2:        return (0xffe);
-       case 1:         return (0x000);
-       case 2:         return (0xfc0);
-       case 3:         return (0x001);
-       case 4:         return (0xf80);
-       case 6:         return (0xfc1);
-       case 7:         return (0x002);
-       case 8:         return (0xf40);
-       case 12:        return (0xf81);
-       case 14:        return (0xfc2);
-       case 15:        return (0x003);
-       case 16:        return (0xf00);
-       default:        return (-1);
+    jit_int32_t                result = encodeLogicalImmediate64(imm);
+    if (result != ENCODE_FAILED) {
+       assert(isValidLogicalImmediate64(result));
+       return (result & 0xfff);
     }
     }
+    return (-1);
 }
 
 static void
 }
 
 static void
@@ -912,7 +907,7 @@ static void
 _o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26)
 {
     instr_t    i;
 _o26(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Simm26)
 {
     instr_t    i;
-    assert(Simm26 >= -33554432 && Simm26 <= 33554431);
+    assert(s26_p(Simm26));
     assert(!(Op   & ~0xfc000000));
     i.w = Op;
     i.imm26.b = Simm26;
     assert(!(Op   & ~0xfc000000));
     i.w = Op;
     i.imm26.b = Simm26;
@@ -1398,6 +1393,27 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        CSEL(r0, r0, r1, CC_EQ);
 }
 
        CSEL(r0, r0, r1, CC_EQ);
 }
 
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    RBIT(r0, r1);
+    clor(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    RBIT(r0, r1);
+    clzr(r0, r0);
+}
+
 static void
 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 static void
 _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -1850,7 +1866,7 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     retry = _jit->pc.w;
     LDAXR(r0, r1);
     eqr(r0, r0, r2);
     retry = _jit->pc.w;
     LDAXR(r0, r1);
     eqr(r0, r0, r2);
-    jump0 = beqi(_jit->pc.w r0, 0);    /* beqi done r0 0 */
+    jump0 = beqi(_jit->pc.w, r0, 0);   /* beqi done r0 0 */
     STLXR(r3, r0, r1);
     jump1 = bnei(_jit->pc.w, r0, 0);   /* bnei retry r0 0 */
     /* done: */
     STLXR(r3, r0, r1);
     jump1 = bnei(_jit->pc.w, r0, 0);   /* bnei retry r0 0 */
     /* done: */
@@ -2166,20 +2182,22 @@ _bmxi(jit_state_t *_jit, jit_int32_t cc,
     return (w);
 }
 
     return (w);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
     jit_int32_t                reg;
     jit_int32_t                reg;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (w >= -33554432 && w <= 33554431)
-       B(w);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    if (s26_p(d))
+       B(d);
     else {
        reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
        movi(rn(reg), i0);
        jmpr(rn(reg));
        jit_unget_reg(reg);
     }
     else {
        reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
        movi(rn(reg), i0);
        jmpr(rn(reg));
        jit_unget_reg(reg);
     }
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -2194,20 +2212,22 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
     return (w);
 }
 
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
     jit_int32_t                reg;
     jit_int32_t                reg;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (w >= -33554432 && w <= 33554431)
-       BL(w);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    if (s26_p(d))
+       BL(d);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        callr(rn(reg));
        jit_unget_reg(reg);
     }
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        callr(rn(reg));
        jit_unget_reg(reg);
     }
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -2222,20 +2242,13 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
     return (w);
 }
 
-/*
- * prolog and epilog not as "optimized" as one would like, but the
- * problem of overallocating stack space to save callee save registers
- * exists on all ports, and is still a todo to use a variable
- *     stack_framesize
- * value, what would cause needing to patch some calls, most likely
- * the offset of jit_arg* of stack arguments.
- */
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, rreg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -2246,40 +2259,51 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
-    STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(stack_framesize >> 3));
-    MOV_XSP(FP_REGNO, SP_REGNO);
-#define SPILL(L, R, O)                                                 \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
-           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
-               STPI(L, R, SP_REGNO, O);                                \
-           else                                                        \
-               STRI(L, SP_REGNO, O);                                   \
-       }                                                               \
-       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
-           STRI(R, SP_REGNO, O + 1);                                   \
-    } while (0)
-    SPILL(19, 20,  2);
-    SPILL(21, 22,  4);
-    SPILL(23, 24,  6);
-    SPILL(25, 26,  8);
-    SPILL(27, 28, 10);
-#undef SPILL
-#define SPILL(R, O)                                                    \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
-               stxi_d(O, SP_REGNO, R);                                 \
-    } while (0)
-    SPILL( 8,  96);
-    SPILL( 9, 104);
-    SPILL(10, 112);
-    SPILL(11, 120);
-    SPILL(12, 128);
-    SPILL(13, 136);
-    SPILL(14, 144);
-    SPILL(15, 152);
-#undef SPILL
-    if (_jitc->function->stack)
+
+    if (!_jitc->function->need_frame) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               jit_check_frame();
+               break;
+           }
+    }
+
+    if (_jitc->function->need_frame) {
+       STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(jit_framesize() >> 3));
+       MOV_XSP(FP_REGNO, SP_REGNO);
+    }
+    /* callee save registers */
+    for (reg = 0, offs = 2; reg < jit_size(iregs);) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) {
+               if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg]))
+                   break;
+           }
+           if (rreg < jit_size(iregs)) {
+               STPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs);
+               offs += 2;
+               reg = rreg + 1;
+           }
+           else {
+               STRI(rn(iregs[reg]), SP_REGNO, offs);
+               ++offs;
+               /* No pair found */
+               break;
+           }
+       }
+       else
+           ++reg;
+    }
+    for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           stxi_d(offs, SP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+  if (_jitc->function->stack)
        subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
        reg = jit_get_reg(jit_class_gpr);
        subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
        reg = jit_get_reg(jit_class_gpr);
@@ -2288,6 +2312,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
        jit_unget_reg(reg);
     }
 
        jit_unget_reg(reg);
     }
 
+#if !__APPLE__
     if (_jitc->function->self.call & jit_call_varargs) {
        /* Save gp registers in the save area, if any is a vararg */
        for (reg = 8 - _jitc->function->vagp / -8;
     if (_jitc->function->self.call & jit_call_varargs) {
        /* Save gp registers in the save area, if any is a vararg */
        for (reg = 8 - _jitc->function->vagp / -8;
@@ -2305,53 +2330,55 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) +
                   reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg));
     }
            stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) +
                   reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg));
     }
+#endif
 }
 
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
 }
 
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, rreg, offs;
     if (_jitc->function->assume_frame)
        return;
     if (_jitc->function->stack)
        MOV_XSP(SP_REGNO, FP_REGNO);
     if (_jitc->function->assume_frame)
        return;
     if (_jitc->function->stack)
        MOV_XSP(SP_REGNO, FP_REGNO);
-#define LOAD(L, R, O)                                                  \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
-           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
-               LDPI(L, R, SP_REGNO, O);                                \
-           else                                                        \
-               LDRI(L, SP_REGNO, O);                                   \
-       }                                                               \
-       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
-           LDRI(R, SP_REGNO, O + 1);                                   \
-    } while (0)
-    LOAD(19, 20,  2);
-    LOAD(21, 22,  4);
-    LOAD(23, 24,  6);
-    LOAD(25, 26,  8);
-    LOAD(27, 28, 10);
-#undef LOAD
-#define LOAD(R, O)                                                     \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
-               ldxi_d(R, SP_REGNO, O);                                 \
-    } while (0)
-    LOAD( 8,  96);
-    LOAD( 9, 104);
-    LOAD(10, 112);
-    LOAD(11, 120);
-    LOAD(12, 128);
-    LOAD(13, 136);
-    LOAD(14, 144);
-    LOAD(15, 152);
-#undef LOAD
-    LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, stack_framesize >> 3);
+    /* callee save registers */
+    for (reg = 0, offs = 2; reg < jit_size(iregs);) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           for (rreg = reg + 1; rreg < jit_size(iregs); rreg++) {
+               if (jit_regset_tstbit(&_jitc->function->regset, iregs[rreg]))
+                   break;
+           }
+           if (rreg < jit_size(iregs)) {
+               LDPI(rn(iregs[reg]), rn(iregs[rreg]), SP_REGNO, offs);
+               offs += 2;
+               reg = rreg + 1;
+           }
+           else {
+               LDRI(rn(iregs[reg]), SP_REGNO, offs);
+               ++offs;
+               /* No pair found */
+               break;
+           }
+       }
+       else
+           ++reg;
+    }
+    for (reg = 0, offs <<= 3; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           ldxi_d(rn(fregs[reg]), SP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame)
+       LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, jit_framesize() >> 3);
     RET();
 }
 
 static void
 _vastart(jit_state_t *_jit, jit_int32_t r0)
 {
     RET();
 }
 
 static void
 _vastart(jit_state_t *_jit, jit_int32_t r0)
 {
+#if !__APPLE__
     jit_int32_t                reg;
 
     assert(_jitc->function->self.call & jit_call_varargs);
     jit_int32_t                reg;
 
     assert(_jitc->function->self.call & jit_call_varargs);
@@ -2362,7 +2389,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     reg = jit_get_reg(jit_class_gpr);
 
     /* Initialize stack pointer to the first stack argument. */
     reg = jit_get_reg(jit_class_gpr);
 
     /* Initialize stack pointer to the first stack argument. */
-    addi(rn(reg), FP_REGNO, _jitc->function->self.size);
+    addi(rn(reg), FP_REGNO, jit_selfsize());
     stxi(offsetof(jit_va_list_t, stack), r0, rn(reg));
 
     /* Initialize gp top pointer to the first stack argument. */
     stxi(offsetof(jit_va_list_t, stack), r0, rn(reg));
 
     /* Initialize gp top pointer to the first stack argument. */
@@ -2382,11 +2409,16 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
 
     jit_unget_reg(reg);
     stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
 
     jit_unget_reg(reg);
+#else
+    assert(_jitc->function->self.call & jit_call_varargs);
+    addi(r0, FP_REGNO, jit_selfsize());
+#endif
 }
 
 static void
 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static void
 _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#if !__APPLE__
     jit_word_t         ge_code;
     jit_word_t         lt_code;
     jit_int32_t                rg0, rg1;
     jit_word_t         ge_code;
     jit_word_t         lt_code;
     jit_int32_t                rg0, rg1;
@@ -2416,7 +2448,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
-    lt_code = jmpi_p(_jit->pc.w);
+    lt_code = jmpi(_jit->pc.w);
 
     /* Where to land if argument is in overflow area. */
     patch_at(ge_code, _jit->pc.w);
 
     /* Where to land if argument is in overflow area. */
     patch_at(ge_code, _jit->pc.w);
@@ -2435,6 +2467,11 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
     patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
+#else
+    assert(_jitc->function->self.call & jit_call_varargs);
+    ldr(r0, r1);
+    addi(r1, r1, sizeof(jit_word_t));
+#endif
 }
 
 static void
 }
 
 static void
@@ -2454,7 +2491,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
     ffc = i.w & 0xffc00000;
     if (fc == A64_B || fc == A64_BL) {
        d = (label - instr) >> 2;
     ffc = i.w & 0xffc00000;
     if (fc == A64_B || fc == A64_BL) {
        d = (label - instr) >> 2;
-       assert(d >= -33554432 && d <= 33554431);
+       assert(s26_p(d));
        i.imm26.b = d;
        u.i[0] = i.w;
     }
        i.imm26.b = d;
        u.i[0] = i.w;
     }
index 7c40539..3d17e32 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -862,6 +862,7 @@ dbopi(ltgt)
 static void
 _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#if !__APPLE__
     jit_word_t         ge_code;
     jit_word_t         lt_code;
     jit_int32_t                rg0, rg1;
     jit_word_t         ge_code;
     jit_word_t         lt_code;
     jit_int32_t                rg0, rg1;
@@ -891,7 +892,7 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
-    lt_code = jmpi_p(_jit->pc.w);
+    lt_code = jmpi(_jit->pc.w);
 
     /* Where to land if argument is in overflow area. */
     patch_at(ge_code, _jit->pc.w);
 
     /* Where to land if argument is in overflow area. */
     patch_at(ge_code, _jit->pc.w);
@@ -910,5 +911,10 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
     patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
+#else
+    assert(_jitc->function->self.call & jit_call_varargs);
+    ldr_d(r0, r1);
+    addi(r1, r1, sizeof(jit_float64_t));
+#endif
 }
 #endif
 }
 #endif
index b1f451f..3d1ea99 100644 (file)
@@ -1,20 +1,25 @@
 
 #if __WORDSIZE == 64
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 120
+#  if PACKED_STACK
+#define JIT_INSTR_MAX 96
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    4, /* align */
+    12,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
-    120,       /* prolog */
+    96,        /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    44,        /* va_start */
-    64,        /* va_arg */
-    72,        /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    4, /* va_start */
+    8, /* va_arg */
+    12,        /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     20,        /* addi */
     0, /* va_end */
     4, /* addr */
     20,        /* addi */
     16,        /* movi */
     8, /* movnr */
     8, /* movzr */
     16,        /* movi */
     8, /* movnr */
     8, /* movzr */
+    28,        /* casr */
+    36,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    8, /* bswapr_us */
+    8, /* bswapr_ui */
+    4, /* bswapr_ul */
     8, /* htonr_us */
     8, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
     8, /* htonr_us */
     8, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
-    12,        /* ldi_c */
+    16,        /* ldi_c */
     4, /* ldr_uc */
     4, /* ldr_uc */
-    12,        /* ldi_uc */
+    16,        /* ldi_uc */
     4, /* ldr_s */
     4, /* ldr_s */
-    12,        /* ldi_s */
+    16,        /* ldi_s */
     4, /* ldr_us */
     4, /* ldr_us */
-    12,        /* ldi_us */
+    16,        /* ldi_us */
     4, /* ldr_i */
     4, /* ldr_i */
-    12,        /* ldi_i */
+    16,        /* ldi_i */
     4, /* ldr_ui */
     4, /* ldr_ui */
-    12,        /* ldi_ui */
+    16,        /* ldi_ui */
     4, /* ldr_l */
     4, /* ldr_l */
-    12,        /* ldi_l */
+    16,        /* ldi_l */
     8, /* ldxr_c */
     20,        /* ldxi_c */
     4, /* ldxr_uc */
     8, /* ldxr_c */
     20,        /* ldxi_c */
     4, /* ldxr_uc */
     4, /* ldxr_l */
     20,        /* ldxi_l */
     4, /* str_c */
     4, /* ldxr_l */
     20,        /* ldxi_l */
     4, /* str_c */
-    12,        /* sti_c */
+    16,        /* sti_c */
     4, /* str_s */
     4, /* str_s */
-    12,        /* sti_s */
+    16,        /* sti_s */
     4, /* str_i */
     4, /* str_i */
-    12,        /* sti_i */
+    16,        /* sti_i */
     4, /* str_l */
     4, /* str_l */
-    12,        /* sti_l */
+    16,        /* sti_l */
     4, /* stxr_c */
     20,        /* stxi_c */
     4, /* stxr_s */
     4, /* stxr_c */
     20,        /* stxi_c */
     4, /* stxr_s */
     8, /* bxsubr_u */
     8, /* bxsubi_u */
     4, /* jmpr */
     8, /* bxsubr_u */
     8, /* bxsubi_u */
     4, /* jmpr */
-    20,        /* jmpi */
+    4, /* jmpi */
     4, /* callr */
     4, /* callr */
-    20,        /* calli */
+    16,        /* calli */
     0, /* prepare */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     4, /* movr_f */
     8, /* movi_f */
     8, /* ldr_f */
     4, /* movr_f */
     8, /* movi_f */
     8, /* ldr_f */
-    16,        /* ldi_f */
+    20,        /* ldi_f */
     8, /* ldxr_f */
     24,        /* ldxi_f */
     8, /* str_f */
     8, /* ldxr_f */
     24,        /* ldxi_f */
     8, /* str_f */
-    16,        /* sti_f */
+    20,        /* sti_f */
     8, /* stxr_f */
     24,        /* stxi_f */
     8, /* bltr_f */
     8, /* stxr_f */
     24,        /* stxi_f */
     8, /* bltr_f */
     4, /* movr_d */
     12,        /* movi_d */
     8, /* ldr_d */
     4, /* movr_d */
     12,        /* movi_d */
     8, /* ldr_d */
-    16,        /* ldi_d */
+    20,        /* ldi_d */
     8, /* ldxr_d */
     24,        /* ldxi_d */
     8, /* str_d */
     8, /* ldxr_d */
     24,        /* ldxi_d */
     8, /* str_d */
-    16,        /* sti_d */
+    20,        /* sti_d */
     8, /* stxr_d */
     24,        /* stxi_d */
     8, /* bltr_d */
     8, /* stxr_d */
     24,        /* stxi_d */
     8, /* bltr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
+    8, /* clo */
+    4, /* clz */
+    12, /* cto */
+    8, /* ctz */
+
+#  else        /* PACKED_STACK */
+#define JIT_INSTR_MAX 120
+    0, /* data */
+    0, /* live */
+    12,        /* align */
+    0, /* save */
+    0, /* load */
+    4, /* skip */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    120,       /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    4, /* va_start */
+    8, /* va_arg */
+    12,        /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    20,        /* addi */
+    4, /* addcr */
+    12,        /* addci */
+    4, /* addxr */
+    8, /* addxi */
+    4, /* subr */
+    20,        /* subi */
+    4, /* subcr */
+    12,        /* subci */
+    4, /* subxr */
+    8, /* subxi */
+    24,        /* rsbi */
+    4, /* mulr */
+    20,        /* muli */
+    12,        /* qmulr */
+    20,        /* qmuli */
+    12,        /* qmulr_u */
+    20,        /* qmuli_u */
+    4, /* divr */
+    20,        /* divi */
+    4, /* divr_u */
+    12,        /* divi_u */
+    20,        /* qdivr */
+    16,        /* qdivi */
+    20,        /* qdivr_u */
+    16,        /* qdivi_u */
+    12,        /* remr */
+    28,        /* remi */
+    12,        /* remr_u */
+    20,        /* remi_u */
+    4, /* andr */
+    20,        /* andi */
+    4, /* orr */
+    20,        /* ori */
+    4, /* xorr */
+    20,        /* xori */
+    4, /* lshr */
+    4, /* lshi */
+    4, /* rshr */
+    4, /* rshi */
+    4, /* rshr_u */
+    4, /* rshi_u */
+    4, /* negr */
+    4, /* comr */
+    8, /* ltr */
+    8, /* lti */
+    8, /* ltr_u */
+    8, /* lti_u */
+    8, /* ler */
+    8, /* lei */
+    8, /* ler_u */
+    8, /* lei_u */
+    8, /* eqr */
+    8, /* eqi */
+    8, /* ger */
+    8, /* gei */
+    8, /* ger_u */
+    8, /* gei_u */
+    8, /* gtr */
+    8, /* gti */
+    8, /* gtr_u */
+    8, /* gti_u */
+    8, /* ner */
+    8, /* nei */
+    4, /* movr */
+    16,        /* movi */
+    8, /* movnr */
+    8, /* movzr */
+    28,        /* casr */
+    36,        /* casi */
+    4, /* extr_c */
+    4, /* extr_uc */
+    4, /* extr_s */
+    4, /* extr_us */
+    4, /* extr_i */
+    4, /* extr_ui */
     8, /* bswapr_us */
     8, /* bswapr_ui */
     4, /* bswapr_ul */
     8, /* bswapr_us */
     8, /* bswapr_ui */
     4, /* bswapr_ul */
-    28,        /* casr */
-    36,        /* casi */
+    8, /* htonr_us */
+    8, /* htonr_ui */
+    4, /* htonr_ul */
+    4, /* ldr_c */
+    16,        /* ldi_c */
+    4, /* ldr_uc */
+    16,        /* ldi_uc */
+    4, /* ldr_s */
+    16,        /* ldi_s */
+    4, /* ldr_us */
+    16,        /* ldi_us */
+    4, /* ldr_i */
+    16,        /* ldi_i */
+    4, /* ldr_ui */
+    16,        /* ldi_ui */
+    4, /* ldr_l */
+    16,        /* ldi_l */
+    8, /* ldxr_c */
+    20,        /* ldxi_c */
+    4, /* ldxr_uc */
+    20,        /* ldxi_uc */
+    4, /* ldxr_s */
+    16,        /* ldxi_s */
+    4, /* ldxr_us */
+    16,        /* ldxi_us */
+    4, /* ldxr_i */
+    20,        /* ldxi_i */
+    4, /* ldxr_ui */
+    16,        /* ldxi_ui */
+    4, /* ldxr_l */
+    20,        /* ldxi_l */
+    4, /* str_c */
+    16,        /* sti_c */
+    4, /* str_s */
+    16,        /* sti_s */
+    4, /* str_i */
+    16,        /* sti_i */
+    4, /* str_l */
+    16,        /* sti_l */
+    4, /* stxr_c */
+    20,        /* stxi_c */
+    4, /* stxr_s */
+    20,        /* stxi_s */
+    4, /* stxr_i */
+    20,        /* stxi_i */
+    4, /* stxr_l */
+    20,        /* stxi_l */
+    8, /* bltr */
+    8, /* blti */
+    8, /* bltr_u */
+    8, /* blti_u */
+    8, /* bler */
+    8, /* blei */
+    8, /* bler_u */
+    8, /* blei_u */
+    8, /* beqr */
+    24,        /* beqi */
+    8, /* bger */
+    8, /* bgei */
+    8, /* bger_u */
+    8, /* bgei_u */
+    8, /* bgtr */
+    8, /* bgti */
+    8, /* bgtr_u */
+    8, /* bgti_u */
+    8, /* bner */
+    24,        /* bnei */
+    8, /* bmsr */
+    8, /* bmsi */
+    8, /* bmcr */
+    8, /* bmci */
+    8, /* boaddr */
+    8, /* boaddi */
+    8, /* boaddr_u */
+    8, /* boaddi_u */
+    8, /* bxaddr */
+    8, /* bxaddi */
+    8, /* bxaddr_u */
+    8, /* bxaddi_u */
+    8, /* bosubr */
+    8, /* bosubi */
+    8, /* bosubr_u */
+    8, /* bosubi_u */
+    8, /* bxsubr */
+    8, /* bxsubi */
+    8, /* bxsubr_u */
+    8, /* bxsubi_u */
+    4, /* jmpr */
+    4, /* jmpi */
+    4, /* callr */
+    16,        /* calli */
+    0, /* prepare */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    96,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    12,        /* addi_f */
+    4, /* subr_f */
+    12,        /* subi_f */
+    12,        /* rsbi_f */
+    4, /* mulr_f */
+    12,        /* muli_f */
+    4, /* divr_f */
+    12,        /* divi_f */
+    4, /* negr_f */
+    4, /* absr_f */
+    4, /* sqrtr_f */
+    8, /* ltr_f */
+    16,        /* lti_f */
+    8, /* ler_f */
+    16,        /* lei_f */
+    8, /* eqr_f */
+    16,        /* eqi_f */
+    8, /* ger_f */
+    16,        /* gei_f */
+    8, /* gtr_f */
+    16,        /* gti_f */
+    8, /* ner_f */
+    16,        /* nei_f */
+    8, /* unltr_f */
+    16,        /* unlti_f */
+    8, /* unler_f */
+    16,        /* unlei_f */
+    16,        /* uneqr_f */
+    24,        /* uneqi_f */
+    8, /* unger_f */
+    16,        /* ungei_f */
+    8, /* ungtr_f */
+    16,        /* ungti_f */
+    16,        /* ltgtr_f */
+    24,        /* ltgti_f */
+    8, /* ordr_f */
+    16,        /* ordi_f */
+    8, /* unordr_f */
+    16,        /* unordi_f */
+    8, /* truncr_f_i */
+    4, /* truncr_f_l */
+    4, /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    8, /* movi_f */
+    8, /* ldr_f */
+    20,        /* ldi_f */
+    8, /* ldxr_f */
+    24,        /* ldxi_f */
+    8, /* str_f */
+    20,        /* sti_f */
+    8, /* stxr_f */
+    24,        /* stxi_f */
+    8, /* bltr_f */
+    16,        /* blti_f */
+    8, /* bler_f */
+    16,        /* blei_f */
+    8, /* beqr_f */
+    16,        /* beqi_f */
+    8, /* bger_f */
+    16,        /* bgei_f */
+    8, /* bgtr_f */
+    16,        /* bgti_f */
+    8, /* bner_f */
+    16,        /* bnei_f */
+    8, /* bunltr_f */
+    16,        /* bunlti_f */
+    8, /* bunler_f */
+    16,        /* bunlei_f */
+    16,        /* buneqr_f */
+    24,        /* buneqi_f */
+    8, /* bunger_f */
+    16,        /* bungei_f */
+    8, /* bungtr_f */
+    16,        /* bungti_f */
+    16,        /* bltgtr_f */
+    24,        /* bltgti_f */
+    8, /* bordr_f */
+    16,        /* bordi_f */
+    8, /* bunordr_f */
+    16,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    12,        /* addi_d */
+    4, /* subr_d */
+    12,        /* subi_d */
+    12,        /* rsbi_d */
+    4, /* mulr_d */
+    12,        /* muli_d */
+    4, /* divr_d */
+    12,        /* divi_d */
+    4, /* negr_d */
+    4, /* absr_d */
+    4, /* sqrtr_d */
+    8, /* ltr_d */
+    16,        /* lti_d */
+    8, /* ler_d */
+    16,        /* lei_d */
+    8, /* eqr_d */
+    16,        /* eqi_d */
+    8, /* ger_d */
+    16,        /* gei_d */
+    8, /* gtr_d */
+    16,        /* gti_d */
+    8, /* ner_d */
+    16,        /* nei_d */
+    8, /* unltr_d */
+    16,        /* unlti_d */
+    8, /* unler_d */
+    16,        /* unlei_d */
+    16,        /* uneqr_d */
+    24,        /* uneqi_d */
+    8, /* unger_d */
+    16,        /* ungei_d */
+    8, /* ungtr_d */
+    16,        /* ungti_d */
+    16,        /* ltgtr_d */
+    24,        /* ltgti_d */
+    8, /* ordr_d */
+    16,        /* ordi_d */
+    8, /* unordr_d */
+    16,        /* unordi_d */
+    8, /* truncr_d_i */
+    4, /* truncr_d_l */
+    4, /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    12,        /* movi_d */
+    8, /* ldr_d */
+    20,        /* ldi_d */
+    8, /* ldxr_d */
+    24,        /* ldxi_d */
+    8, /* str_d */
+    20,        /* sti_d */
+    8, /* stxr_d */
+    24,        /* stxi_d */
+    8, /* bltr_d */
+    16,        /* blti_d */
+    8, /* bler_d */
+    16,        /* blei_d */
+    8, /* beqr_d */
+    20,        /* beqi_d */
+    8, /* bger_d */
+    16,        /* bgei_d */
+    8, /* bgtr_d */
+    16,        /* bgti_d */
+    8, /* bner_d */
+    16,        /* bnei_d */
+    8, /* bunltr_d */
+    16,        /* bunlti_d */
+    8, /* bunler_d */
+    16,        /* bunlei_d */
+    16,        /* buneqr_d */
+    24,        /* buneqi_d */
+    8, /* bunger_d */
+    16,        /* bungei_d */
+    8, /* bungtr_d */
+    16,        /* bungti_d */
+    16,        /* bltgtr_d */
+    24,        /* bltgti_d */
+    8, /* bordr_d */
+    16,        /* bordi_d */
+    8, /* bunordr_d */
+    16,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    0, /* movr_w_f */
+    0, /* movr_ww_d */
+    0, /* movr_w_d */
+    0, /* movr_f_w */
+    0, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    0, /* movr_d_w */
+    0, /* movi_d_w */
+    8, /* clo */
+    4, /* clz */
+    12, /* cto */
+    8, /* ctz */
+#  endif
 #endif /* __WORDSIZE */
 #endif /* __WORDSIZE */
index b54d007..243e677 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
  *     Paulo Cesar Pereira de Andrade
  */
 
  *     Paulo Cesar Pereira de Andrade
  */
 
+/* callee save
+ * align16(lr+fp+x19+x2[0-8]+v8+v9+v1[0-15]) */
+#define stack_framesize                        160
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
 
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
 
+#if __APPLE__
+typedef jit_pointer_t jit_va_list_t;
+#else
 typedef struct jit_qreg {
     jit_float64_t      l;
     jit_float64_t      h;
 typedef struct jit_qreg {
     jit_float64_t      l;
     jit_float64_t      h;
@@ -52,10 +59,13 @@ typedef struct jit_va_list {
     jit_qreg_t         q6;
     jit_qreg_t         q7;
 } jit_va_list_t;
     jit_qreg_t         q6;
     jit_qreg_t         q7;
 } jit_va_list_t;
+#endif
 
 /*
  * Prototypes
  */
 
 /*
  * Prototypes
  */
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
@@ -72,7 +82,11 @@ extern void __clear_cache(void *, void *);
  */
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0x08,                  "x8" },
  */
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0x08,                  "x8" },
+#if __APPLE__
+    { 0x12,                            "x18" },
+#else
     { rc(gpr) | 0x12,                  "x18" },
     { rc(gpr) | 0x12,                  "x18" },
+#endif
     { rc(gpr) | 0x11,                  "x17" },
     { rc(gpr) | 0x10,                  "x16" },
     { rc(gpr) | 0x09,                  "x9" },
     { rc(gpr) | 0x11,                  "x17" },
     { rc(gpr) | 0x10,                  "x16" },
     { rc(gpr) | 0x09,                  "x9" },
@@ -138,6 +152,14 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _R19, _R20, _R21, _R22, _R23, _R24, _R25, _R26, _R27, _R28
+};
+
+static jit_int32_t fregs[] = {
+    _V8, _V9, _V10, _V11, _V12, _V13, _V14, _V15
+};
+
 /*
  * Implementation
  */
 /*
  * Implementation
  */
@@ -198,6 +220,7 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -258,20 +281,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -331,7 +352,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -341,6 +362,7 @@ void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare) {
        jit_link_prepare();
        assert(!(_jitc->function->call.call & jit_call_varargs));
     if (_jitc->prepare) {
        jit_link_prepare();
        assert(!(_jitc->function->call.call & jit_call_varargs));
@@ -351,6 +373,7 @@ _jit_ellipsis(jit_state_t *_jit)
        assert(!(_jitc->function->self.call & jit_call_varargs));
        _jitc->function->self.call |= jit_call_varargs;
 
        assert(!(_jitc->function->self.call & jit_call_varargs));
        _jitc->function->self.call |= jit_call_varargs;
 
+#if !__APPLE_
        /* Allocate va_list like object in the stack,
         * with enough space to save all argument
         * registers, and use fixed offsets for them. */
        /* Allocate va_list like object in the stack,
         * with enough space to save all argument
         * registers, and use fixed offsets for them. */
@@ -367,6 +390,7 @@ _jit_ellipsis(jit_state_t *_jit)
            _jitc->function->vafp = (8 - _jitc->function->self.argf) * -16;
        else
            _jitc->function->vafp = 0;
            _jitc->function->vafp = (8 - _jitc->function->self.argf) * -16;
        else
            _jitc->function->vafp = 0;
+#endif
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -380,7 +404,7 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
@@ -389,10 +413,22 @@ _jit_arg(jit_state_t *_jit)
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
+#if PACKED_STACK || STRONG_TYPE_CHECKING
+       assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
+#if PACKED_STACK
+       _jitc->function->self.size +=
+           _jitc->function->self.size & ((1 << (code - jit_code_arg_c)) - 1);
+#endif
        offset = _jitc->function->self.size;
        offset = _jitc->function->self.size;
+#if PACKED_STACK
+       _jitc->function->self.size += 1 << (code - jit_code_arg_c);
+#else
        _jitc->function->self.size += sizeof(jit_word_t);
        _jitc->function->self.size += sizeof(jit_word_t);
+#endif
+       jit_check_frame();
     }
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -408,8 +444,17 @@ _jit_arg_f(jit_state_t *_jit)
     if (jit_arg_f_reg_p(_jitc->function->self.argf))
        offset = _jitc->function->self.argf++;
     else {
     if (jit_arg_f_reg_p(_jitc->function->self.argf))
        offset = _jitc->function->self.argf++;
     else {
+#if PACKED_STACK
+       _jitc->function->self.size +=
+           _jitc->function->self.size & (sizeof(jit_float32_t) - 1);
+#endif
        offset = _jitc->function->self.size;
        offset = _jitc->function->self.size;
+#if PACKED_STACK
+       _jitc->function->self.size += sizeof(jit_float32_t);
+#else
        _jitc->function->self.size += sizeof(jit_word_t);
        _jitc->function->self.size += sizeof(jit_word_t);
+#endif
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
@@ -427,8 +472,13 @@ _jit_arg_d(jit_state_t *_jit)
     if (jit_arg_f_reg_p(_jitc->function->self.argf))
        offset = _jitc->function->self.argf++;
     else {
     if (jit_arg_f_reg_p(_jitc->function->self.argf))
        offset = _jitc->function->self.argf++;
     else {
+#if PACKED_STACK
+       _jitc->function->self.size +=
+           _jitc->function->self.size & (sizeof(jit_float64_t) - 1);
+#endif
        offset = _jitc->function->self.size;
        offset = _jitc->function->self.size;
-       _jitc->function->self.size += sizeof(jit_word_t);
+       _jitc->function->self.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
@@ -439,111 +489,235 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     jit_inc_synth_wp(getarg_c, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_c(u, JIT_RA0 - v->u.w);
        jit_extr_c(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_c(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_c(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     jit_inc_synth_wp(getarg_uc, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_uc(u, JIT_RA0 - v->u.w);
        jit_extr_uc(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_uc(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     jit_inc_synth_wp(getarg_s, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_s(u, JIT_RA0 - v->u.w);
        jit_extr_s(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_s(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_s(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     jit_inc_synth_wp(getarg_us, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_us(u, JIT_RA0 - v->u.w);
        jit_extr_us(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_us(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_us(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     jit_inc_synth_wp(getarg_i, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK || __WORDSIZE == 32
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_i(u, JIT_RA0 - v->u.w);
        jit_extr_i(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_i(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
+#if __WORDSIZE == 64
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     jit_inc_synth_wp(getarg_ui, u, v);
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_reg_p(v->u.w)) {
+#if PACKED_STACK
+       jit_movr(u, JIT_RA0 - v->u.w);
+#else
        jit_extr_ui(u, JIT_RA0 - v->u.w);
        jit_extr_ui(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_ui(u, JIT_FP, v->u.w);
+#endif
+    }
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_l(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
     jit_dec_synth();
 }
+#endif
 
 void
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr(JIT_RA0 - v->u.w, u);
-    else
-       jit_stxi(v->u.w, JIT_FP, u);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
+    if (jit_arg_reg_p(v->u.w)) {
+       jit_int32_t     regno = JIT_RA0 - v->u.w;
+#if PACKED_STACK
+       switch (code) {
+           case jit_code_putargr_c:    jit_extr_c(regno, u);   break;
+           case jit_code_putargr_uc:   jit_extr_uc(regno, u);  break;
+           case jit_code_putargr_s:    jit_extr_s(regno, u);   break;
+           case jit_code_putargr_us:   jit_extr_us(regno, u);  break;
+#  if __WORDISZE == 32
+           case jit_code_putargr_i:    jit_movr(regno, u);     break;
+#  else
+           case jit_code_putargr_i:    jit_extr_i(regno, u);   break;
+           case jit_code_putargr_ui:   jit_extr_ui(regno, u);  break;
+           case jit_code_putargr_l:    jit_movr(regno, u);     break;
+#  endif
+           default:                    abort();                break;
+       }
+#else
+       jit_movr(regno, u);
+#endif
+    }
+    else {
+       jit_node_t      *node;
+#if PACKED_STACK
+       switch (code) {
+           case jit_code_putargr_c:    case jit_code_putargr_uc:
+               node = jit_stxi_c(v->u.w, JIT_FP, u);           break;
+           case jit_code_putargr_s:    case jit_code_putargr_us:
+               node = jit_stxi_s(v->u.w, JIT_FP, u);           break;
+#  if __WORDSIZE == 32
+           case jit_code_putargr_i:
+               node = jit_stxi(v->u.w, JIT_FP, u);             break;
+#  else
+           case jit_code_putargr_i:    case jit_code_putargr_ui:
+               node = jit_stxi_i(v->u.w, JIT_FP, u);           break;
+           case jit_code_putargr_l:
+               node = jit_stxi(v->u.w, JIT_FP, u);             break;
+#  endif
+           default:                    abort();                break;
+       }
+#else
+       node = jit_stxi(v->u.w, JIT_FP, u);
+#endif
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
+#if PACKED_STACK
+    switch (code) {
+       case jit_code_putargi_c:        u = (jit_int8_t)u;      break;
+       case jit_code_putargi_uc:       u = (jit_uint8_t)u;     break;
+       case jit_code_putargi_s:        u = (jit_int16_t)u;     break;
+       case jit_code_putargi_us:       u = (jit_uint16_t)u;    break;
+#  if __WORDSIZE == 32
+       case jit_code_putargi_i:                                break;
+#  else
+       case jit_code_putargi_i:        u = (jit_int32_t)u;     break;
+       case jit_code_putargi_ui:       u = (jit_uint32_t)u;    break;
+       case jit_code_putargi_l:                                break;
+#  endif
+       default:                        abort();                break;
+    }
+#endif
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, JIT_FP, regno);
+#if PACKED_STACK
+       switch (code) {
+           case jit_code_putargi_c:    case jit_code_putargi_uc:
+               node = jit_stxi_c(v->u.w, JIT_FP, regno);       break;
+           case jit_code_putargi_s:    case jit_code_putargi_us:
+               node = jit_stxi_s(v->u.w, JIT_FP, regno);       break;
+#  if __WORDSIZE == 32
+           case jit_code_putargi_i:
+               node = jit_stxi(v->u.w, JIT_FP, regno);         break;
+#  else
+           case jit_code_putargi_i:    case jit_code_putargi_ui:
+               node = jit_stxi_i(v->u.w, JIT_FP, regno);       break;
+           case jit_code_putargi_l:
+               node = jit_stxi(v->u.w, JIT_FP, regno);         break;
+#  endif
+           default:                    abort();                break;
+       }
+#else
+       node = jit_stxi(v->u.w, JIT_FP, regno);
+#endif
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -556,8 +730,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     jit_inc_synth_wp(getarg_f, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr_f(u, JIT_FA0 - v->u.w);
     jit_inc_synth_wp(getarg_f, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr_f(u, JIT_FA0 - v->u.w);
-    else
-       jit_ldxi_f(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_f(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -568,8 +744,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     jit_inc_synth_wp(putargr_f, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movr_f(JIT_FA0 - v->u.w, u);
     jit_inc_synth_wp(putargr_f, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movr_f(JIT_FA0 - v->u.w, u);
-    else
-       jit_stxi_f(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_f(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -582,9 +760,11 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(JIT_FA0 - v->u.w, u);
     else {
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(JIT_FA0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, JIT_FP, regno);
+       node = jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -597,8 +777,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     jit_inc_synth_wp(getarg_d, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movr_d(u, JIT_FA0 - v->u.w);
     jit_inc_synth_wp(getarg_d, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movr_d(u, JIT_FA0 - v->u.w);
-    else
-       jit_ldxi_d(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_d(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -609,8 +791,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     jit_inc_synth_wp(putargr_d, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr_d(JIT_FA0 - v->u.w, u);
     jit_inc_synth_wp(putargr_d, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr_d(JIT_FA0 - v->u.w, u);
-    else
-       jit_stxi_d(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_d(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -623,48 +807,161 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movi_d(JIT_FA0 - v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi_d(JIT_FA0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, JIT_FP, regno);
+       node = jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
+       jit_int32_t     regno = JIT_RA0 - _jitc->function->call.argi;
+#if PACKED_STACK
+       switch (code) {
+           case jit_code_pushargr_c:   jit_extr_c(regno, u);   break;
+           case jit_code_pushargr_uc:  jit_extr_uc(regno, u);  break;
+           case jit_code_pushargr_s:   jit_extr_s(regno, u);   break;
+           case jit_code_pushargr_us:  jit_extr_us(regno, u);  break;
+#  if __WORDISZE == 32
+           case jit_code_pushargr_i:   jit_movr(regno, u);     break;
+#  else
+           case jit_code_pushargr_i:   jit_extr_i(regno, u);   break;
+           case jit_code_pushargr_ui:  jit_extr_ui(regno, u);  break;
+           case jit_code_pushargr_l:   jit_movr(regno, u);     break;
+#  endif
+           default:                    abort();                break;
+       }
+#else
+       jit_movr(regno, u);
+#endif
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(code == jit_code_pushargr);
+           jit_stxi(_jitc->function->call.size, JIT_SP, u);
+           _jitc->function->call.size += sizeof(jit_word_t);
+       }
+#endif
        ++_jitc->function->call.argi;
     }
     else {
        ++_jitc->function->call.argi;
     }
     else {
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size &
+               ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1);
+       switch (code) {
+           case jit_code_pushargr_c:   case jit_code_pushargr_uc:
+               jit_stxi_c(_jitc->function->call.size, JIT_SP, u);
+               break;
+           case jit_code_pushargr_s:   case jit_code_pushargr_us:
+               jit_stxi_s(_jitc->function->call.size, JIT_SP, u);
+               break;
+#  if __WORDSIZE == 32
+           case jit_code_pushargr_i:
+               jit_stxi(_jitc->function->call.size, JIT_SP, u);
+               break;
+#  else
+           case jit_code_pushargr_i:   case jit_code_pushargr_ui:
+               jit_stxi_i(_jitc->function->call.size, JIT_SP, u);
+               break;
+           case jit_code_pushargr_l:
+               jit_stxi(_jitc->function->call.size, JIT_SP, u);
+               break;
+#  endif
+           default:
+               abort();
+               break;
+       }
+       _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2);
+#else
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+#endif
+       jit_check_frame();
     }
     jit_dec_synth();
 }
 
 void
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     jit_link_prepare();
+#if PACKED_STACK
+    switch (code) {
+       case jit_code_pushargi_c:       u = (jit_int8_t)u;      break;
+       case jit_code_pushargi_uc:      u = (jit_uint8_t)u;     break;
+       case jit_code_pushargi_s:       u = (jit_int16_t)u;     break;
+       case jit_code_pushargi_us:      u = (jit_uint16_t)u;    break;
+#  if __WORDSIZE == 32
+       case jit_code_pushargi_i:                               break;
+#  else
+       case jit_code_pushargi_i:       u = (jit_int32_t)u;     break;
+       case jit_code_pushargi_ui:      u = (jit_uint32_t)u;    break;
+       case jit_code_pushargi_l:                               break;
+#  endif
+       default:                        abort();                break;
+    }
+#endif
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
+       regno = JIT_RA0 - _jitc->function->call.argi;
+       jit_movi(regno, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(code == jit_code_pushargi);
+           jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+           _jitc->function->call.size += sizeof(jit_word_t);
+       }
+#endif
        ++_jitc->function->call.argi;
     }
     else {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        ++_jitc->function->call.argi;
     }
     else {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size &
+               ((1 << ((code - jit_code_pushargr_c) >> 2)) - 1);
+       switch (code) {
+           case jit_code_pushargi_c:   case jit_code_pushargi_uc:
+               jit_stxi_c(_jitc->function->call.size, JIT_SP, regno);
+               break;
+           case jit_code_pushargi_s:   case jit_code_pushargi_us:
+               jit_stxi_s(_jitc->function->call.size, JIT_SP, regno);
+               break;
+#  if __WORDSIZE == 32
+           case jit_code_pushargi_i:
+               jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+               break;
+#  else
+           case jit_code_pushargi_i:   case jit_code_pushargi_ui:
+               jit_stxi_i(_jitc->function->call.size, JIT_SP, regno);
+               break;
+           case jit_code_pushargi_l:
+               jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+               break;
+#  endif
+           default:
+               abort();
+               break;
+       }
+       _jitc->function->call.size += 1 << ((code - jit_code_pushargr_c) >> 2);
+#else
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
-       jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
        _jitc->function->call.size += sizeof(jit_word_t);
+#endif
+       jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -677,11 +974,27 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(sizeof(jit_float32_t) == sizeof(jit_word_t));
+           jit_stxi_f(_jitc->function->call.size, JIT_SP,
+                      JIT_FA0 - _jitc->function->call.argf);
+           _jitc->function->call.size += sizeof(jit_word_t);
+       }
+#endif
        ++_jitc->function->call.argf;
     }
     else {
        ++_jitc->function->call.argf;
     }
     else {
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size & (sizeof(jit_float32_t) - 1);
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += sizeof(jit_float32_t);
+#else
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+#endif
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -695,14 +1008,30 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u);
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(sizeof(jit_float32_t) == sizeof(jit_word_t));
+           jit_stxi_f(_jitc->function->call.size, JIT_SP,
+                      JIT_FA0 - _jitc->function->call.argf);
+           _jitc->function->call.size += sizeof(jit_word_t);
+       }
+#endif
        ++_jitc->function->call.argf;
     }
     else {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        ++_jitc->function->call.argf;
     }
     else {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size & (sizeof(jit_float32_t) - 1);
+       jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+       _jitc->function->call.size += sizeof(jit_float32_t);
+#else
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
-       jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
        _jitc->function->call.size += sizeof(jit_word_t);
+#endif
+       jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -715,11 +1044,24 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(sizeof(jit_float64_t) == sizeof(jit_word_t));
+           jit_stxi_d(_jitc->function->call.size, JIT_SP,
+                      JIT_FA0 - _jitc->function->call.argf);
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       }
+#endif
        ++_jitc->function->call.argf;
     }
     else {
        ++_jitc->function->call.argf;
     }
     else {
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size & (sizeof(jit_float64_t) - 1);
+#endif
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -733,14 +1075,27 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
     jit_link_prepare();
     if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
        jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
+#if __APPLE__
+       if (_jitc->function->call.call & jit_call_varargs) {
+           assert(sizeof(jit_float64_t) == sizeof(jit_word_t));
+           jit_stxi_d(_jitc->function->call.size, JIT_SP,
+                      JIT_FA0 - _jitc->function->call.argf);
+           _jitc->function->call.size += sizeof(jit_float64_t);
+       }
+#endif
        ++_jitc->function->call.argf;
     }
     else {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        ++_jitc->function->call.argf;
     }
     else {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
+#if PACKED_STACK
+       _jitc->function->call.size +=
+           _jitc->function->call.size & (sizeof(jit_float64_t) - 1);
+#endif
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -769,7 +1124,12 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishr, r0);
     jit_inc_synth_w(finishr, r0);
+#if PACKED_STACK
+    _jitc->function->call.size +=
+       _jitc->function->call.size & (sizeof(jit_word_t) - 1);
+#endif
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     node = jit_callr(r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     node = jit_callr(r0);
@@ -786,7 +1146,12 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     jit_inc_synth_w(finishi, (jit_word_t)i0);
+#if PACKED_STACK
+    _jitc->function->call.size +=
+       _jitc->function->call.size & (sizeof(jit_word_t) - 1);
+#endif
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     node = jit_calli(i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     node = jit_calli(i0);
@@ -835,10 +1200,15 @@ void
 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_inc_synth_w(retval_i, r0);
 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_inc_synth_w(retval_i, r0);
+#if __WORDSIZE == 32
+    jit_movr(r0, JIT_RET);
+#else
     jit_extr_i(r0, JIT_RET);
     jit_extr_i(r0, JIT_RET);
+#endif
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
+#if __WORDSIZE == 64
 void
 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
 {
 void
 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
 {
@@ -851,10 +1221,10 @@ void
 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_inc_synth_w(retval_l, r0);
 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_inc_synth_w(retval_l, r0);
-    if (r0 != JIT_RET)
-       jit_movr(r0, JIT_RET);
+    jit_movr(r0, JIT_RET);
     jit_dec_synth();
 }
     jit_dec_synth();
 }
+#endif
 
 void
 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
 
 void
 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
@@ -886,6 +1256,7 @@ _emit_code(jit_state_t *_jit)
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1023,6 +1394,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1070,6 +1444,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1391,6 +1769,7 @@ _emit_code(jit_state_t *_jit)
                case_brr(bunord, _d);
                case_brd(bunord);
            case jit_code_jmpr:
                case_brr(bunord, _d);
                case_brd(bunord);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -1401,17 +1780,26 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s26_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
                    jmpi(node->u.w);
+               }
                break;
            case jit_code_callr:
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
+               jit_check_frame();
                if (node->flag & jit_flag_node) {
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                if (node->flag & jit_flag_node) {
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
@@ -1419,7 +1807,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
-                       word = calli_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s26_p(word))
+                           word = calli(_jit->pc.w);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
@@ -1430,11 +1823,14 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
+               compute_framesize();
+               patch_alist(0);
                _jitc->again = 0;
                prolog(node);
                break;
                _jitc->again = 0;
                prolog(node);
                break;
@@ -1450,10 +1846,22 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                /* remember label is defined */
                    goto restart_function;
                }
                /* remember label is defined */
@@ -1474,11 +1882,23 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1486,16 +1906,34 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_retval_s:             case jit_code_retval_us:
            case jit_code_retval_i:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_retval_s:             case jit_code_retval_us:
            case jit_code_retval_i:
+#if __WORDSIZE == 64
            case jit_code_retval_ui:            case jit_code_retval_l:
            case jit_code_retval_ui:            case jit_code_retval_l:
+#endif
            case jit_code_retval_f:             case jit_code_retval_d:
            case jit_code_prepare:
            case jit_code_finishr:              case jit_code_finishi:
            case jit_code_retval_f:             case jit_code_retval_d:
            case jit_code_prepare:
            case jit_code_finishr:              case jit_code_finishi:
@@ -1571,6 +2009,23 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     stxi_d(i0, rn(r0), rn(r1));
 }
 
     stxi_d(i0, rn(r0), rn(r1));
 }
 
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = 16;     /* ra+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += sizeof(jit_word_t);
+
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
index 40f3126..91d15c8 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -320,6 +320,12 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 #define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define negr(r0,r1)                  NEGQ(r1,r0)
 #  define comr(r0,r1)                  NOT(r1,r0)
 #define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define negr(r0,r1)                  NEGQ(r1,r0)
 #  define comr(r0,r1)                  NOT(r1,r0)
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 CTLZ(r1, r0)
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 CTTZ(r1, r0)
 #  define addr(r0,r1,r2)               ADDQ(r1,r2,r0)
 #  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define addr(r0,r1,r2)               ADDQ(r1,r2,r0)
 #  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -637,7 +643,7 @@ static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define jmpr(r0)                     JMP(_R31_REGNO,r0,0)
 #  define jmpi(i0)                     _jmpi(_jit,i0)
 static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define jmpr(r0)                     JMP(_R31_REGNO,r0,0)
 #  define jmpi(i0)                     _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*, jit_word_t);
+static jit_word_t _jmpi(jit_state_t*, jit_word_t);
 #  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
 #define callr(r0)                      _callr(_jit,r0)
 #  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
 #define callr(r0)                      _callr(_jit,r0)
@@ -825,7 +831,7 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     }
     again = _jit->pc.w;                        /* AGAIN */
     LDQ_L(r0, r1, 0);                  /* Load r0 locked */
     }
     again = _jit->pc.w;                        /* AGAIN */
     LDQ_L(r0, r1, 0);                  /* Load r0 locked */
-    jump0 = bner(0, r0, r2);           /* bne FAIL r0 r2 */
+    jump0 = bner(_jit->pc.w, r0, r2);  /* bne FAIL r0 r2 */
     movr(r0, r3);                      /* Move to r0 to attempt to store */
     STQ_C(r0, r1, 0);                  /* r0 is an in/out argument */
     jump1 = _jit->pc.w;
     movr(r0, r3);                      /* Move to r0 to attempt to store */
     STQ_C(r0, r1, 0);                  /* r0 is an in/out argument */
     jump1 = _jit->pc.w;
@@ -840,6 +846,20 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
         jit_unget_reg(r1_reg);
 }
 
         jit_unget_reg(r1_reg);
 }
 
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    ctzr(r0, r0);
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -2543,7 +2563,7 @@ _bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(t0);
 }
 
     jit_unget_reg(t0);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
     jit_word_t         w;
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
     jit_word_t         w;
@@ -2553,7 +2573,8 @@ _jmpi(jit_state_t *_jit, jit_word_t i0)
     if (_s21_p(d))
        BR(_R31_REGNO, d);
     else
     if (_s21_p(d))
        BR(_R31_REGNO, d);
     else
-       (void)jmpi_p(i0);
+       w = jmpi_p(i0);
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
index 5452a1e..83736b7 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index ac314f2..14949b2 100644 (file)
@@ -1,10 +1,11 @@
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 168
+#define JIT_INSTR_MAX 88
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    4, /* align */
+    12,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     20,        /* va_start */
     24,        /* va_arg */
     44,        /* va_arg_d */
     20,        /* va_start */
     24,        /* va_arg */
     44,        /* va_arg_d */
     32,        /* movi */
     4, /* movnr */
     4, /* movzr */
     32,        /* movi */
     4, /* movnr */
     4, /* movzr */
+    32,        /* casr */
+    60,        /* casi */
     8, /* extr_c */
     8, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     8, /* extr_i */
     8, /* extr_ui */
     8, /* extr_c */
     8, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     8, /* extr_i */
     8, /* extr_ui */
+    16,        /* bswapr_us */
+    36,        /* bswapr_ui */
+    36,        /* bswapr_ul */
     16,        /* htonr_us */
     36,        /* htonr_ui */
     36,        /* htonr_ul */
     16,        /* htonr_us */
     36,        /* htonr_ui */
     36,        /* htonr_ul */
     16,        /* bxsubr_u */
     16,        /* bxsubi_u */
     4, /* jmpr */
     16,        /* bxsubr_u */
     16,        /* bxsubi_u */
     4, /* jmpr */
-    36,        /* jmpi */
+    4, /* jmpi */
     8, /* callr */
     36,        /* calli */
     0, /* prepare */
     8, /* callr */
     36,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    16,        /* bswapr_us */
-    36,        /* bswapr_ui */
-    36,        /* bswapr_ul */
-    32,        /* casr */
-    60,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    8, /* cto */
+    4, /* ctz */
 #endif /* __WORDSIZE */
 #endif /* __WORDSIZE */
index 678d5c6..25566f4 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -246,20 +246,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -319,7 +317,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -361,18 +359,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function != NULL);
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function != NULL);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += 8;
     }
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += 8;
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -417,7 +419,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _A0 - v->u.w);
@@ -429,7 +431,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _A0 - v->u.w);
@@ -441,7 +443,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _A0 - v->u.w);
@@ -453,7 +455,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _A0 - v->u.w);
@@ -465,7 +467,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, _A0 - v->u.w);
@@ -477,7 +479,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _A0 - v->u.w);
@@ -489,7 +491,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _A0 - v->u.w);
@@ -499,10 +501,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_A0 - v->u.w, u);
     else
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_A0 - v->u.w, u);
     else
@@ -511,11 +513,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_A0 - v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_A0 - v->u.w, u);
     else {
@@ -610,10 +612,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function != NULL);
 {
     assert(_jitc->function != NULL);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_A0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_A0 - _jitc->function->call.argi, u);
@@ -627,11 +629,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_int64_t u)
+_jit_pushargi(jit_state_t *_jit, jit_int64_t u, jit_code_t code)
 {
     jit_int32_t                regno;
     assert(_jitc->function != NULL);
 {
     jit_int32_t                regno;
     assert(_jitc->function != NULL);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_A0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_A0 - _jitc->function->call.argi, u);
@@ -863,6 +865,7 @@ _emit_code(jit_state_t *_jit)
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -983,6 +986,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1124,6 +1130,10 @@ _emit_code(jit_state_t *_jit)
                break;
                case_rr(neg,);
                case_rr(com,);
                break;
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
@@ -1361,7 +1371,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (_s21_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
@@ -1390,6 +1405,7 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1410,6 +1426,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1434,11 +1460,18 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:                case jit_code_arg_l:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1446,10 +1479,22 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
index 12f9a2f..a0852a2 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -37,8 +37,7 @@
 #  define jit_armv5e_p()               (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
 #  define jit_armv6_p()                        (jit_cpu.version >= 6)
 #  define jit_armv7_p()                        (jit_cpu.version >= 7)
 #  define jit_armv5e_p()               (jit_cpu.version > 5 || (jit_cpu.version == 5 && jit_cpu.extend))
 #  define jit_armv6_p()                        (jit_cpu.version >= 6)
 #  define jit_armv7_p()                        (jit_cpu.version >= 7)
-#  define jit_armv7r_p()               0
-#  define stack_framesize              48
+#  define jit_armv7r_p()               (jit_cpu.version > 7 || (jit_cpu.version == 7 && jit_cpu.extend))
 extern int     __aeabi_idivmod(int, int);
 extern unsigned        __aeabi_uidivmod(unsigned, unsigned);
 #  define _R0_REGNO                    0x00
 extern int     __aeabi_idivmod(int, int);
 extern unsigned        __aeabi_uidivmod(unsigned, unsigned);
 #  define _R0_REGNO                    0x00
@@ -141,8 +140,12 @@ extern unsigned    __aeabi_uidivmod(unsigned, unsigned);
 #  define THUMB2_UMULL                 0xfba00000
 #  define ARM_SMULL                    0x00c00090
 #  define THUMB2_SMULL                 0xfb800000
 #  define THUMB2_UMULL                 0xfba00000
 #  define ARM_SMULL                    0x00c00090
 #  define THUMB2_SMULL                 0xfb800000
+/* >> ARMv7r */
+#  define ARM_SDIV                     0x07100010
+#  define ARM_UDIV                     0x07300010
 #  define THUMB2_SDIV                  0xfb90f0f0
 #  define THUMB2_UDIV                  0xfbb0f0f0
 #  define THUMB2_SDIV                  0xfb90f0f0
 #  define THUMB2_UDIV                  0xfbb0f0f0
+/* << ARMv7r */
 #  define ARM_AND                      0x00000000
 #  define THUMB_AND                        0x4000
 #  define THUMB2_AND                   0xea000000
 #  define ARM_AND                      0x00000000
 #  define THUMB_AND                        0x4000
 #  define THUMB2_AND                   0xea000000
@@ -185,6 +188,12 @@ extern unsigned    __aeabi_uidivmod(unsigned, unsigned);
 #  define ARM_STREX                    0x01800090
 #  define THUMB2_STREX                 0xe8400000
 /* << ARMv6* */
 #  define ARM_STREX                    0x01800090
 #  define THUMB2_STREX                 0xe8400000
 /* << ARMv6* */
+/* >> ARMv6t2 */
+#  define THUMB2_CLZ                   0xfab0f080
+#  define THUMB2_RBIT                  0xfa90f0a0
+#  define ARM_RBIT                     0x06f00030
+/* << ARMv6t2 */
+#  define ARM_CLZ                      0x01600010
 /* >> ARMv7 */
 #  define ARM_DMB                      0xf57ff050
 #  define THUMB2_DMB                   0xf3bf8f50
 /* >> ARMv7 */
 #  define ARM_DMB                      0xf57ff050
 #  define THUMB2_DMB                   0xf3bf8f50
@@ -447,6 +456,12 @@ static void _tdmb(jit_state_t *_jit, int im);
 #  define NOT(rd,rm)                   CC_NOT(ARM_CC_AL,rd,rm)
 #  define T1_NOT(rd,rm)                        T1_MVN(rd,rm)
 #  define T2_NOT(rd,rm)                        T2_MVN(rd,rm)
 #  define NOT(rd,rm)                   CC_NOT(ARM_CC_AL,rd,rm)
 #  define T1_NOT(rd,rm)                        T1_MVN(rd,rm)
 #  define T2_NOT(rd,rm)                        T2_MVN(rd,rm)
+#  define T2_CLZ(rd,rm)                        torrr(THUMB2_CLZ,rm,rd,rm)
+#  define CC_CLZ(cc,rd,rm)             corrrr(cc,ARM_CLZ,_R15_REGNO,rd,_R15_REGNO,rm)
+#  define CLZ(rd,rm)                   CC_CLZ(ARM_CC_AL,rd,rm)
+#  define T2_RBIT(rd,rm)               torrr(THUMB2_RBIT,rm,rd,rm)
+#  define CC_RBIT(cc,rd,rm)            corrrr(cc,ARM_RBIT,_R15_REGNO,rd,_R15_REGNO,rm)
+#  define RBIT(rd,rm)                  CC_RBIT(ARM_CC_AL,rd,rm)
 #  define NOP()                                MOV(_R0_REGNO, _R0_REGNO)
 #  define T1_NOP()                     is(0xbf00)
 #  define CC_ADD(cc,rd,rn,rm)          corrr(cc,ARM_ADD,rn,rd,rm)
 #  define NOP()                                MOV(_R0_REGNO, _R0_REGNO)
 #  define T1_NOP()                     is(0xbf00)
 #  define CC_ADD(cc,rd,rn,rm)          corrr(cc,ARM_ADD,rn,rd,rm)
@@ -524,6 +539,10 @@ static void _tdmb(jit_state_t *_jit, int im);
 #  define CC_UMULL(cc,rl,rh,rn,rm)     corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
 #  define UMULL(rl,rh,rn,rm)           CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
 #  define T2_UMULL(rl,rh,rn,rm)                torrrr(THUMB2_UMULL,rn,rl,rh,rm)
 #  define CC_UMULL(cc,rl,rh,rn,rm)     corrrr(cc,ARM_UMULL,rh,rl,rm,rn)
 #  define UMULL(rl,rh,rn,rm)           CC_UMULL(ARM_CC_AL,rl,rh,rn,rm)
 #  define T2_UMULL(rl,rh,rn,rm)                torrrr(THUMB2_UMULL,rn,rl,rh,rm)
+#  define CC_SDIV(cc,rd,rn,rm)         corrrr(cc,ARM_SDIV,rd,15,rn,rm)
+#  define SDIV(rd,rn,rm)               CC_SDIV(ARM_CC_AL,rd,rm,rn)
+#  define CC_UDIV(cc,rd,rn,rm)         corrrr(cc,ARM_UDIV,rd,15,rn,rm)
+#  define UDIV(rd,rn,rm)               CC_UDIV(ARM_CC_AL,rd,rm,rn)
 #  define T2_SDIV(rd,rn,rm)            torrr(THUMB2_SDIV,rn,rd,rm)
 #  define T2_UDIV(rd,rn,rm)            torrr(THUMB2_UDIV,rn,rd,rm)
 #  define CC_AND(cc,rd,rn,rm)          corrr(cc,ARM_AND,rn,rd,rm)
 #  define T2_SDIV(rd,rn,rm)            torrr(THUMB2_SDIV,rn,rd,rm)
 #  define T2_UDIV(rd,rn,rm)            torrr(THUMB2_UDIV,rn,rd,rm)
 #  define CC_AND(cc,rd,rn,rm)          corrr(cc,ARM_AND,rn,rd,rm)
@@ -852,6 +871,8 @@ static void _tdmb(jit_state_t *_jit, int im);
 #  define T2_POP(im)                   tpp(THUMB2_POP,im)
 #  define jit_get_reg_args()                                           \
     do {                                                               \
 #  define T2_POP(im)                   tpp(THUMB2_POP,im)
 #  define jit_get_reg_args()                                           \
     do {                                                               \
+       CHECK_REG_ARGS();                                               \
+       jit_check_frame();                                              \
        (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr);           \
        (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr);           \
        (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr);           \
        (void)jit_get_reg(_R0|jit_class_named|jit_class_gpr);           \
        (void)jit_get_reg(_R1|jit_class_named|jit_class_gpr);           \
        (void)jit_get_reg(_R2|jit_class_named|jit_class_gpr);           \
@@ -885,6 +906,14 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define negr(r0,r1)                  _negr(_jit,r0,r1)
 static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define negr(r0,r1)                  _negr(_jit,r0,r1)
 static void _negr(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define addr(r0,r1,r2)               _addr(_jit,r0,r1,r2)
 static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
 #  define addr(r0,r1,r2)               _addr(_jit,r0,r1,r2)
 static void _addr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
@@ -1147,10 +1176,10 @@ static void _prolog(jit_state_t*,jit_node_t*);
 static void _epilog(jit_state_t*,jit_node_t*);
 #  define callr(r0)                    _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
 static void _epilog(jit_state_t*,jit_node_t*);
 #  define callr(r0)                    _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
-#  define calli(i0)                    _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
-#  define calli_p(i0)                  _calli_p(_jit,i0)
-static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define calli(i0,i1)                 _calli(_jit,i0,i1)
+static void _calli(jit_state_t*,jit_word_t,jit_bool_t);
+#  define calli_p(i0,i1)               _calli_p(_jit,i0,i1)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_bool_t);
 #  define vastart(r0)                  _vastart(_jit, r0)
 static void _vastart(jit_state_t*, jit_int32_t);
 #  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
 #  define vastart(r0)                  _vastart(_jit, r0)
 static void _vastart(jit_state_t*, jit_int32_t);
 #  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
@@ -1526,7 +1555,7 @@ _tpp(jit_state_t *_jit, int o, int im)
     assert(!(o & 0x0000ffff));
     if (o == THUMB2_PUSH)
        assert(!(im & 0x8000));
     assert(!(o & 0x0000ffff));
     if (o == THUMB2_PUSH)
        assert(!(im & 0x8000));
-    assert(__builtin_popcount(im & 0x1fff) > 1);
+    assert(__builtin_popcount(im & 0x7fff) > 1);
     thumb.i = o|im;
     iss(thumb.s[0], thumb.s[1]);
 }
     thumb.i = o|im;
     iss(thumb.s[0], thumb.s[1]);
 }
@@ -1737,6 +1766,53 @@ _negr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
        RSBI(r0, r1, 0);
 }
 
        RSBI(r0, r1, 0);
 }
 
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (!jit_thumb_p() && jit_armv5e_p())
+       CLZ(r0, r1);
+    else if (jit_thumb_p() && jit_armv7_p()) { /* armv6t2 actually */
+       T2_CLZ(r0, r1);
+    }
+    else
+       fallback_clz(r0, r0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_armv7_p()) {       /* armv6t2 actually */
+       if (jit_thumb_p())
+           T2_RBIT(r0, r1);
+       else
+           RBIT(r0, r1);
+       clor(r0, r0);
+    }
+    else
+       fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_armv7_p()) {       /* armv6t2 actually */
+       if (jit_thumb_p())
+           T2_RBIT(r0, r1);
+       else
+           RBIT(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_ctz(r0, r1);
+}
+
 static void
 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2232,8 +2308,12 @@ _divrem(jit_state_t *_jit, int div, int sign,
 static void
 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    if (jit_armv7r_p() && jit_thumb_p())
-       T2_SDIV(r0, r1, r2);
+    if (jit_armv7r_p()) {
+       if (jit_thumb_p())
+           T2_SDIV(r0, r1, r2);
+       else
+           SDIV(r0, r1, r2);
+    }
     else
        divrem(1, 1, r0, r1, r2);
 }
     else
        divrem(1, 1, r0, r1, r2);
 }
@@ -2251,8 +2331,12 @@ _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    if (jit_armv7r_p() && jit_thumb_p())
-       T2_UDIV(r0, r1, r2);
+    if (jit_armv7r_p()) {
+       if (jit_thumb_p())
+           T2_UDIV(r0, r1, r2);
+       else
+           UDIV(r0, r1, r2);
+    }
     else
        divrem(1, 0, r0, r1, r2);
 }
     else
        divrem(1, 0, r0, r1, r2);
 }
@@ -2312,7 +2396,23 @@ _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
 static void
 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    divrem(0, 1, r0, r1, r2);
+    if (jit_armv7r_p()) {
+       jit_int32_t             reg;
+       if (r0 == r1 || r0 == r2) {
+           reg = jit_get_reg(jit_class_gpr);
+           divr(rn(reg), r1, r2);
+           mulr(rn(reg), r2, rn(reg));
+           subr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else {
+           divr(r0, r1, r2);
+           mulr(r0, r2, r0);
+           subr(r0, r1, r0);
+       }
+    }
+    else
+       divrem(0, 1, r0, r1, r2);
 }
 
 static void
 }
 
 static void
@@ -2328,7 +2428,23 @@ _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    divrem(0, 0, r0, r1, r2);
+    if (jit_armv7r_p()) {
+       jit_int32_t             reg;
+       if (r0 == r1 || r0 == r2) {
+           reg = jit_get_reg(jit_class_gpr);
+           divr_u(rn(reg), r1, r2);
+           mulr(rn(reg), r2, rn(reg));
+           subr(r0, r1, rn(reg));
+           jit_unget_reg(reg);
+       }
+       else {
+           divr_u(r0, r1, r2);
+           mulr(r0, r2, r0);
+           subr(r0, r1, r0);
+       }
+    }
+    else
+       divrem(0, 0, r0, r1, r2);
 }
 
 static void
 }
 
 static void
@@ -2741,8 +2857,8 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
     jit_word_t         w;
     jit_word_t         d;
     jit_int32_t                reg;
     jit_word_t         w;
     jit_word_t         d;
     jit_int32_t                reg;
+    /* i1 means jump is reachable in signed 24 bits  */
     if (i1) {
     if (i1) {
-       /* Assume jump is not longer than 23 bits if inside jit */
        w = _jit->pc.w;
        /* if thumb and in thumb mode */
        if (jit_thumb_p() && _jitc->thumb) {
        w = _jit->pc.w;
        /* if thumb and in thumb mode */
        if (jit_thumb_p() && _jitc->thumb) {
@@ -3835,14 +3951,29 @@ _callr(jit_state_t *_jit, jit_int32_t r0)
 }
 
 static void
 }
 
 static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t exchange_p)
 {
     jit_word_t         d;
     jit_int32_t                reg;
 {
     jit_word_t         d;
     jit_int32_t                reg;
-    d = ((i0 - _jit->pc.w) >> 2) - 2;
-    if (!jit_exchange_p() && !jit_thumb_p() && _s24P(d))
-       BLI(d & 0x00ffffff);
+    if (!exchange_p) {
+       if (jit_thumb_p()) {
+           if (jit_exchange_p())
+               /* skip switch from  arm to thumb 
+                * exchange_p set to zero means a jit function
+                * call in the same jit code buffer */
+               d = ((i0 + 8 - _jit->pc.w) >> 1) - 2;
+           else
+               d = ((i0 - _jit->pc.w) >> 1) - 2;
+       }
+       else                    d = ((i0 - _jit->pc.w) >> 2) - 2;
+       if (_s24P(d)) {
+           if (jit_thumb_p())  T2_BLI(encode_thumb_jump(d));
+           else                BLI(d & 0x00ffffff);
+       }
+       else                    goto fallback;
+    }
     else {
     else {
+    fallback:
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        if (jit_thumb_p())
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        if (jit_thumb_p())
@@ -3854,28 +3985,44 @@ _calli(jit_state_t *_jit, jit_word_t i0)
 }
 
 static jit_word_t
 }
 
 static jit_word_t
-_calli_p(jit_state_t *_jit, jit_word_t i0)
+_calli_p(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
 {
     jit_word_t         w;
 {
     jit_word_t         w;
+    jit_word_t         d;
     jit_int32_t                reg;
     jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    w = _jit->pc.w;
-    movi_p(rn(reg), i0);
-    if (jit_thumb_p())
-       T1_BLX(rn(reg));
-    else
-       BLX(rn(reg));
-    jit_unget_reg(reg);
+    /* i1 means call is reachable in signed 24 bits  */
+    if (i1) {
+       w = _jit->pc.w;
+       if (jit_thumb_p())      d = ((i0 - _jit->pc.w) >> 1) - 2;
+       else                    d = ((i0 - _jit->pc.w) >> 2) - 2;
+       assert(_s24P(d));
+       if (jit_thumb_p())      T2_BLI(encode_thumb_jump(d));
+       else                    BLI(d & 0x00ffffff);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       w = _jit->pc.w;
+       movi_p(rn(reg), i0);
+       if (jit_thumb_p())
+           T1_BLX(rn(reg));
+       else
+           BLX(rn(reg));
+       jit_unget_reg(reg);
+    }
     return (w);
 }
 
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
     return (w);
 }
 
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, mask, count;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        assert(_jitc->function->self.aoff >= frame);
+       if (jit_swf_p())
+           CHECK_SWF_OFFSET();
+       CHECK_REG_ARGS();
        if (_jitc->function->assume_frame) {
            if (jit_thumb_p() && !_jitc->thumb)
                _jitc->thumb = _jit->pc.w;
        if (_jitc->function->assume_frame) {
            if (jit_thumb_p() && !_jitc->thumb)
                _jitc->thumb = _jit->pc.w;
@@ -3888,38 +4035,66 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 8 bytes */
                              _jitc->function->self.aoff) + 7) & -8;
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 8 bytes */
                              _jitc->function->self.aoff) + 7) & -8;
+    /* If this jit_check_frame() succeeds, it actually is just a need_stack,
+     * usually for arguments, so, allocai was not called, but pusharg*
+     * was called increasing stack size, for negative access offsets.
+     * This can be optimized for one less prolog instruction, that is,
+     * do not create the frame pointer, and only add _jitc->function->stack
+     * to sp, and on epilog, instead of moving fp to sp, just add negative
+     * value of _jitc->function->stack. Since this condition requires a
+     * large function body for excess arguments to called function, keep
+     * things a bit simpler for now, as this is the only place need_stack
+     * would be useful. */
+    if (_jitc->function->stack)
+       jit_check_frame();
+
+    for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           mask |= 1 << rn(iregs[reg]);
+           ++count;
+       }
+    }
+    /* One extra register to keep stack 8 bytes aligned */
+    if (count & 1) {
+       for (reg = 4; reg < 10; reg++) {
+           if (!(mask & (1 << reg))) {
+               mask |= 1 << reg;
+               break;
+           }
+       }
+    }
+    if (_jitc->function->need_frame || _jitc->function->need_return)
+       mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
+    if (!jit_swf_p() && _jitc->function->save_reg_args &&
+       !(_jitc->function->self.call & jit_call_varargs))
+       mask |= 0xf;
 
     if (jit_thumb_p()) {
        /*  switch to thumb mode (better approach would be to
         * ORR 1 address being called, but no clear distinction
         * of what is a pointer to a jit function, or if patching
         * a pointer to a jit function) */
 
     if (jit_thumb_p()) {
        /*  switch to thumb mode (better approach would be to
         * ORR 1 address being called, but no clear distinction
         * of what is a pointer to a jit function, or if patching
         * a pointer to a jit function) */
-       ADDI(_R12_REGNO, _R15_REGNO, 1);
-       BX(_R12_REGNO);
+       if (jit_exchange_p()) {
+           ADDI(_R12_REGNO, _R15_REGNO, 1);
+           BX(_R12_REGNO);
+       }
        if (!_jitc->thumb)
            _jitc->thumb = _jit->pc.w;
        if (!_jitc->thumb)
            _jitc->thumb = _jit->pc.w;
-       if (jit_cpu.abi) {
-           T2_PUSH(0xf);
-           T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-           VPUSH_F64(_D8_REGNO, 8);
-       }
-       else {
+       if (jit_swf_p() || (_jitc->function->save_reg_args &&
+                           (_jitc->function->self.call & jit_call_varargs)))
            T2_PUSH(0xf);
            T2_PUSH(0xf);
-           T2_PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-       }
+       if (mask)
+           T2_PUSH(mask);
     }
     else {
     }
     else {
-       if (jit_cpu.abi) {
-           PUSH(0xf);
-           PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-           VPUSH_F64(_D8_REGNO, 8);
-       }
-       else {
+       if (jit_swf_p() || (_jitc->function->save_reg_args &&
+                           (_jitc->function->self.call & jit_call_varargs)))
            PUSH(0xf);
            PUSH(0xf);
-           PUSH(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-       }
+       if (mask)
+           PUSH(mask);
     }
     }
-    movr(_FP_REGNO, _SP_REGNO);
+    if (_jitc->function->need_frame)
+       movr(_FP_REGNO, _SP_REGNO);
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
@@ -3933,17 +4108,41 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, mask, count;
     if (_jitc->function->assume_frame)
        return;
 
     if (_jitc->function->assume_frame)
        return;
 
-    movr(_SP_REGNO, _FP_REGNO);
-    if (jit_cpu.abi)
-       VPOP_F64(_D8_REGNO, 8);
-    if (jit_thumb_p())
-       T2_POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-    else
-       POP(0x3f0|(1<<_FP_REGNO)|(1<<_LR_REGNO));
-    addi(_SP_REGNO, _SP_REGNO, 16);
+    for (reg = mask = count = 0; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           mask |= 1 << rn(iregs[reg]);
+           ++count;
+       }
+    }
+    /* One extra register to keep stack 8 bytes aligned */
+    if (count & 1) {
+       for (reg = 4; reg < 10; reg++) {
+           if (!(mask & (1 << reg))) {
+               mask |= 1 << reg;
+               break;
+           }
+       }
+    }
+    if (_jitc->function->need_frame || _jitc->function->need_return)
+       mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
+    if (_jitc->function->need_frame)
+       movr(_SP_REGNO, _FP_REGNO);
+    if (!jit_swf_p() && _jitc->function->save_reg_args &&
+       !(_jitc->function->self.call & jit_call_varargs))
+       addi(_SP_REGNO, _SP_REGNO, 16);
+    if (mask) {
+       if (jit_thumb_p())
+           T2_POP(mask);
+       else
+           POP(mask);
+    }
+    if (jit_swf_p() || (_jitc->function->save_reg_args &&
+                       (_jitc->function->self.call & jit_call_varargs)))
+       addi(_SP_REGNO, _SP_REGNO, 16);
     if (jit_thumb_p())
        T1_BX(_LR_REGNO);
     else
     if (jit_thumb_p())
        T1_BX(_LR_REGNO);
     else
@@ -3961,8 +4160,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
      * The -16 is to account for the 4 argument registers
      * always saved, and _jitc->function->vagp is to account
      * for declared arguments. */
      * The -16 is to account for the 4 argument registers
      * always saved, and _jitc->function->vagp is to account
      * for declared arguments. */
-    addi(r0, _FP_REGNO, _jitc->function->self.size -
-        16 + _jitc->function->vagp);
+    addi(r0, _FP_REGNO, jit_selfsize() - 16 + _jitc->function->vagp);
 }
 
 static void
 }
 
 static void
@@ -3989,7 +4187,28 @@ _patch_at(jit_state_t *_jit,
        jit_word_t       w;
     } u;
     u.w = instr;
        jit_word_t       w;
     } u;
     u.w = instr;
-    if (kind == arm_patch_jump) {
+    if (kind == arm_patch_call) {
+       if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
+           code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+           assert((thumb.i & THUMB2_BLI) == THUMB2_BLI);
+           /* skip code to switch from arm to thumb mode */
+           if (jit_exchange_p())
+               d = ((label + 8 - instr) >> 1) - 2;
+           else
+               d = ((label - instr) >> 1) - 2;
+           assert(_s24P(d));
+           thumb.i = THUMB2_BLI | encode_thumb_jump(d);
+           thumb2code(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
+       }
+       else {
+           thumb.i = u.i[0];
+           assert((thumb.i & 0x0f000000) == ARM_BLI);
+           d = ((label - instr) >> 2) - 2;
+           assert(_s24P(d));
+           u.i[0] = (thumb.i & 0xff000000) | (d & 0x00ffffff);
+       }
+    }
+    else if (kind == arm_patch_jump) {
        if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
            code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
            if ((thumb.i & THUMB2_B) == THUMB2_B) {
        if (jit_thumb_p() && (jit_uword_t)instr >= _jitc->thumb) {
            code2thumb(thumb.s[0], thumb.s[1], u.s[0], u.s[1]);
            if ((thumb.i & THUMB2_B) == THUMB2_B) {
index c88f9e3..2aa6a12 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -564,6 +564,8 @@ _swf_ff(jit_state_t *_jit, float(*i0)(float),
        jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
        jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -581,6 +583,8 @@ _swf_dd(jit_state_t *_jit, double (*i0)(double),
        jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
        jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -614,6 +618,8 @@ _swf_fff(jit_state_t *_jit, float (*i0)(float, float),
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -635,6 +641,8 @@ _swf_ddd(jit_state_t *_jit, double (*i0)(double, double),
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -684,6 +692,8 @@ _swf_fff_(jit_state_t *_jit, float (*i0)(float, float),
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.f = i1;
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     data.f = i1;
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -706,6 +716,8 @@ _swf_rsbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float32_t i0)
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.f = i0;
     movi(_R0_REGNO, data.i);
     if (jit_fpr_p(r1))
     data.f = i0;
     movi(_R0_REGNO, data.i);
     if (jit_fpr_p(r1))
@@ -729,7 +741,8 @@ _swf_ddd_(jit_state_t *_jit, double (*i0)(double, double),
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
-
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.d = i1;
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
     data.d = i1;
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -769,6 +782,8 @@ _swf_rsbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.d = i0;
     movi(_R0_REGNO, data.i[0]);
     movi(_R1_REGNO, data.i[1]);
     data.d = i0;
     movi(_R0_REGNO, data.i[0]);
     movi(_R1_REGNO, data.i[1]);
@@ -805,6 +820,8 @@ _swf_iff(jit_state_t *_jit, int (*i0)(float, float),
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -823,6 +840,8 @@ _swf_idd(jit_state_t *_jit, int (*i0)(double, double),
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -861,6 +880,8 @@ _swf_iff_(jit_state_t *_jit, int (*i0)(float, float),
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.f = i1;
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     data.f = i1;
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -881,6 +902,8 @@ _swf_idd_(jit_state_t *_jit, int (*i0)(double, double),
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.d = i1;
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
     data.d = i1;
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -907,6 +930,8 @@ _swf_iunff(jit_state_t *_jit, int (*i0)(float, float),
 {
     jit_word_t         instr;
     jit_get_reg_args();
 {
     jit_word_t         instr;
     jit_get_reg_args();
+    if (jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -952,6 +977,8 @@ _swf_iundd(jit_state_t *_jit, int (*i0)(double, double),
 {
     jit_word_t         instr;
     jit_get_reg_args();
 {
     jit_word_t         instr;
     jit_get_reg_args();
+    if (jit_fpr_p(r1) || jit_fpr_p(r2))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -1033,6 +1060,8 @@ _swf_iunff_(jit_state_t *_jit, int (*i0)(float, float),
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
        jit_float32_t   f;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.f = i1;
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     data.f = i1;
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -1077,6 +1106,8 @@ _swf_iundd_(jit_state_t *_jit, int (*i0)(double, double),
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     data.d = i1;
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
     data.d = i1;
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -1135,6 +1166,8 @@ _swf_bff(jit_state_t *_jit, int (*i0)(float, float), int cc,
 {
     jit_word_t         w, d;
     jit_get_reg_args();
 {
     jit_word_t         w, d;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     else
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     else
@@ -1168,6 +1201,8 @@ _swf_bdd(jit_state_t *_jit, int (*i0)(double, double), int cc,
 {
     jit_word_t         w, d;
     jit_get_reg_args();
 {
     jit_word_t         w, d;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
@@ -1221,6 +1256,8 @@ _swf_bff_(jit_state_t *_jit, int (*i0)(float, float), int cc,
     } data;
     jit_word_t         w, d;
     jit_get_reg_args();
     } data;
     jit_word_t         w, d;
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.f = i2;
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     data.f = i2;
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
@@ -1256,6 +1293,8 @@ _swf_bdd_(jit_state_t *_jit, int (*i0)(double, double), int cc,
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.d = i2;
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
     data.d = i2;
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -1296,6 +1335,8 @@ _swf_bunff(jit_state_t *_jit, int eq,
 {
     jit_word_t         w, d, j0, j1;
     jit_get_reg_args();
 {
     jit_word_t         w, d, j0, j1;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     else
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     else
@@ -1366,6 +1407,8 @@ _swf_bundd(jit_state_t *_jit, int eq,
 {
     jit_word_t         w, d, j0, j1;
     jit_get_reg_args();
 {
     jit_word_t         w, d, j0, j1;
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
@@ -1473,6 +1516,8 @@ _swf_bunff_(jit_state_t *_jit, int eq,
     jit_word_t         w, d, j0, j1;
     data.f = i1;
     jit_get_reg_args();
     jit_word_t         w, d, j0, j1;
     data.f = i1;
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     else
     if (jit_fpr_p(r0))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r0) + 8);
     else
@@ -1541,6 +1586,8 @@ _swf_bundd_(jit_state_t *_jit, int eq,
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
        jit_float64_t   d;
     } data;
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.d = i1;
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
     data.d = i1;
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p())
@@ -1622,6 +1669,8 @@ static void
 _swf_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
 _swf_extr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     movr(_R0_REGNO, r1);
     swf_call(__aeabi_i2f, i2f, _R1_REGNO);
     if (jit_fpr_p(r0))
     movr(_R0_REGNO, r1);
     swf_call(__aeabi_i2f, i2f, _R1_REGNO);
     if (jit_fpr_p(r0))
@@ -1635,6 +1684,8 @@ static void
 _swf_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
 _swf_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     movr(_R0_REGNO, r1);
     swf_call(__aeabi_i2d, i2d, _R2_REGNO);
     if (jit_fpr_p(r0)) {
     movr(_R0_REGNO, r1);
     swf_call(__aeabi_i2d, i2d, _R2_REGNO);
     if (jit_fpr_p(r0)) {
@@ -1656,6 +1707,8 @@ static void
 _swf_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
 _swf_extr_d_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -1680,6 +1733,8 @@ static void
 _swf_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
 _swf_extr_f_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_get_reg_args();
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -1709,6 +1764,8 @@ _swf_truncr_f_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_word_t         slow_not_nan;
 #endif
     jit_get_reg_args();
     jit_word_t         slow_not_nan;
 #endif
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
     if (jit_fpr_p(r1))
        swf_ldrin(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     else
@@ -1763,6 +1820,8 @@ _swf_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_word_t         slow_not_nan;
 #endif
     jit_get_reg_args();
     jit_word_t         slow_not_nan;
 #endif
     jit_get_reg_args();
+    if (jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
     if (jit_fpr_p(r1)) {
        if (!jit_thumb_p() && jit_armv5e_p())
            LDRDIN(_R0_REGNO, _FP_REGNO, swf_off(r1) + 8);
@@ -1823,6 +1882,8 @@ _swf_movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (r0 != r1) {
 {
     jit_int32_t                reg;
     if (r0 != r1) {
+       if (jit_fpr_p(r0) || jit_fpr_p(r1))
+           CHECK_SWF_OFFSET();
        if (jit_fpr_p(r1)) {
            reg = jit_get_reg(jit_class_gpr);
            swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
        if (jit_fpr_p(r1)) {
            reg = jit_get_reg(jit_class_gpr);
            swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
@@ -1844,6 +1905,8 @@ _swf_movr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (r0 != r1) {
 {
     jit_int32_t                reg;
     if (r0 != r1) {
+       if (jit_fpr_p(r0) || jit_fpr_p(r1))
+           CHECK_SWF_OFFSET();
        if (jit_fpr_p(r1)) {
            if (!jit_thumb_p() && jit_armv5e_p() &&
                (reg = jit_get_reg_pair()) != JIT_NOREG) {
        if (jit_fpr_p(r1)) {
            if (!jit_thumb_p() && jit_armv5e_p() &&
                (reg = jit_get_reg_pair()) != JIT_NOREG) {
@@ -1894,6 +1957,8 @@ _swf_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
        jit_float32_t   f;
     } data;
     jit_int32_t                reg;
        jit_float32_t   f;
     } data;
     jit_int32_t                reg;
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.f = i0;
     if (jit_fpr_p(r0)) {
        reg = jit_get_reg(jit_class_gpr);
     data.f = i0;
     if (jit_fpr_p(r0)) {
        reg = jit_get_reg(jit_class_gpr);
@@ -1913,6 +1978,8 @@ _swf_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
        jit_int32_t     i[2];
        jit_float64_t   d;
     } data;
        jit_int32_t     i[2];
        jit_float64_t   d;
     } data;
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     data.d = i0;
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p() &&
     data.d = i0;
     if (jit_fpr_p(r0)) {
        if (!jit_thumb_p() && jit_armv5e_p() &&
@@ -1941,6 +2008,8 @@ static void
 _swf_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
 _swf_absr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
     if (jit_fpr_p(r1)) {
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
@@ -1966,6 +2035,8 @@ static void
 _swf_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
 _swf_absr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
            r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
     if (jit_fpr_p(r1)) {
        if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
            r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
@@ -2013,6 +2084,8 @@ static void
 _swf_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
 _swf_negr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
     if (jit_fpr_p(r1)) {
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
@@ -2038,6 +2111,8 @@ static void
 _swf_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
 _swf_negr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0) || jit_fpr_p(r1))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r1)) {
        if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
            r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
     if (jit_fpr_p(r1)) {
        if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
            r0 != r1 && (reg = jit_get_reg_pair()) != JIT_NOREG) {
@@ -2170,6 +2245,7 @@ _swf_ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        ldxi_i(rn(reg), r1, 0);
        swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
        reg = jit_get_reg(jit_class_gpr);
        ldxi_i(rn(reg), r1, 0);
        swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
@@ -2184,6 +2260,7 @@ _swf_ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (reg = jit_get_reg_pair()) != JIT_NOREG) {
            LDRDI(rn(reg), r1, 0);
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (reg = jit_get_reg_pair()) != JIT_NOREG) {
            LDRDI(rn(reg), r1, 0);
@@ -2212,6 +2289,7 @@ _swf_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        ldi_i(rn(reg), i0);
        swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
        reg = jit_get_reg(jit_class_gpr);
        ldi_i(rn(reg), i0);
        swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
@@ -2225,6 +2303,8 @@ static void
 _swf_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                rg0, rg1;
 _swf_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                rg0, rg1;
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
        (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
        movi(rn(rg0), i0);
     if (jit_fpr_p(r0) && !jit_thumb_p() && jit_armv5e_p() &&
        (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
        movi(rn(rg0), i0);
@@ -2258,6 +2338,7 @@ _swf_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        ldxr_i(rn(reg), r1, r2);
        swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
        reg = jit_get_reg(jit_class_gpr);
        ldxr_i(rn(reg), r1, r2);
        swf_strin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
@@ -2272,6 +2353,7 @@ _swf_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r0)) {
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
            LDRD(rn(rg0), r1, r2);
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
            LDRD(rn(rg0), r1, r2);
@@ -2307,6 +2389,8 @@ static void
 _swf_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
 _swf_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
+    if (jit_fpr_p(r0))
+       CHECK_SWF_OFFSET();
     if (jit_fpr_p(r0)) {
        reg = jit_get_reg(jit_class_gpr);
        ldxi_i(rn(reg), r1, i0);
     if (jit_fpr_p(r0)) {
        reg = jit_get_reg(jit_class_gpr);
        ldxi_i(rn(reg), r1, i0);
@@ -2322,6 +2406,7 @@ _swf_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r0)) {
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
        if (!jit_thumb_p() && jit_armv5e_p() &&
            ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
@@ -2391,6 +2476,7 @@ _swf_str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r1)) {
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r1)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
        stxi_i(0, r0, rn(reg));
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
        stxi_i(0, r0, rn(reg));
@@ -2405,6 +2491,7 @@ _swf_str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r1)) {
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r1)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (reg = jit_get_reg_pair()) != JIT_NOREG) {
            LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (reg = jit_get_reg_pair()) != JIT_NOREG) {
            LDRDIN(rn(reg), _FP_REGNO, swf_off(r1) + 8);
@@ -2435,6 +2522,7 @@ _swf_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
        sti_i(i0, rn(reg));
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r0) + 8);
        sti_i(i0, rn(reg));
@@ -2449,6 +2537,7 @@ _swf_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r0)) {
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r0)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
            rg1 = jit_get_reg(jit_class_gpr);
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
            rg1 = jit_get_reg(jit_class_gpr);
@@ -2488,6 +2577,7 @@ _swf_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r2)) {
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r2)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r2) + 8);
        stxr_i(r1, r0, rn(reg));
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r2) + 8);
        stxr_i(r1, r0, rn(reg));
@@ -2502,6 +2592,7 @@ _swf_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r2)) {
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r2)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
            LDRDIN(rn(rg0), _FP_REGNO, swf_off(r2) + 8);
        if (!jit_thumb_p() && jit_armv5e_p() &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
            LDRDIN(rn(rg0), _FP_REGNO, swf_off(r2) + 8);
@@ -2538,6 +2629,7 @@ _swf_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r1)) {
 {
     jit_int32_t                reg;
     if (jit_fpr_p(r1)) {
+       CHECK_SWF_OFFSET();
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
        stxi_i(i0, r0, rn(reg));
        reg = jit_get_reg(jit_class_gpr);
        swf_ldrin(rn(reg), _FP_REGNO, swf_off(r1) + 8);
        stxi_i(i0, r0, rn(reg));
@@ -2552,6 +2644,7 @@ _swf_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r1)) {
 {
     jit_int32_t                rg0, rg1;
     if (jit_fpr_p(r1)) {
+       CHECK_SWF_OFFSET();
        if (!jit_thumb_p() && jit_armv5e_p() &&
            ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
        if (!jit_thumb_p() && jit_armv5e_p() &&
            ((i0 >= 0 && i0 <= 255) || (i0 < 0 && i0 >= -255)) &&
            (rg0 = jit_get_reg_pair()) != JIT_NOREG) {
index 14f085a..faba5a8 100644 (file)
@@ -1,12 +1,13 @@
 
 #if __WORDSIZE == 32
 #if defined(__ARM_PCS_VFP)
 
 #if __WORDSIZE == 32
 #if defined(__ARM_PCS_VFP)
-#define JIT_INSTR_MAX 48
+#define JIT_INSTR_MAX 50
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    2, /* align */
+    14,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     2, /* #name */
     0, /* #note */
     0, /* label */
     2, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     16,        /* va_arg_d */
     4, /* va_start */
     8, /* va_arg */
     16,        /* va_arg_d */
     8, /* movi */
     8, /* movnr */
     8, /* movzr */
     8, /* movi */
     8, /* movnr */
     8, /* movzr */
+    42,        /* casr */
+    50,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    8, /* bswapr_us */
+    4, /* bswapr_ui */
+    0, /* bswapr_ul */
     8, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     8, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     4, /* callr */
     20,        /* calli */
     0, /* prepare */
     4, /* callr */
     20,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     8, /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
     8, /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    16,        /* movi_d */
+    32,        /* movi_d */
     4, /* ldr_d */
     12,        /* ldi_d */
     8, /* ldxr_d */
     4, /* ldr_d */
     12,        /* ldi_d */
     8, /* ldxr_d */
     12,        /* bler_d */
     28,        /* blei_d */
     12,        /* beqr_d */
     12,        /* bler_d */
     28,        /* blei_d */
     12,        /* beqr_d */
-    28,        /* beqi_d */
+    36,        /* beqi_d */
     12,        /* bger_d */
     28,        /* bgei_d */
     12,        /* bgtr_d */
     12,        /* bger_d */
     28,        /* bgei_d */
     12,        /* bgtr_d */
     12,        /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     12,        /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    8, /* bswapr_us */
-    4, /* bswapr_ui */
-    0, /* bswapr_ul */
-    40,        /* casr */
-    48,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    12,        /* cto */
+    8, /* ctz */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 32
 #if !defined(__ARM_PCS_VFP)
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 32
 #if !defined(__ARM_PCS_VFP)
-#define JIT_INSTR_MAX 160
+#define JIT_INSTR_MAX 50
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    2, /* align */
+    18,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     2, /* #name */
     0, /* #note */
     0, /* label */
     2, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     28,        /* va_arg_d */
     4, /* va_start */
     8, /* va_arg */
     28,        /* va_arg_d */
     8, /* movi */
     8, /* movnr */
     8, /* movzr */
     8, /* movi */
     8, /* movnr */
     8, /* movzr */
+    42,        /* casr */
+    46,        /* casi */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    20,        /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
     20,        /* htonr_us */
     16,        /* htonr_ui */
     0, /* htonr_ul */
     20,        /* htonr_us */
     16,        /* htonr_ui */
     0, /* htonr_ul */
     4, /* callr */
     20,        /* calli */
     0, /* prepare */
     4, /* callr */
     20,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_i */
     0, /* retval_ui */
     0, /* retval_l */
     0, /* retval_i */
     0, /* retval_ui */
     0, /* retval_l */
-    160,       /* epilog */
+    30,        /* epilog */
     0, /* arg_f */
     0, /* getarg_f */
     0, /* putargr_f */
     0, /* arg_f */
     0, /* getarg_f */
     0, /* putargr_f */
     28,        /* extr_f */
     22,        /* extr_d_f */
     8, /* movr_f */
     28,        /* extr_f */
     22,        /* extr_d_f */
     8, /* movr_f */
-    12,        /* movi_f */
+    16,        /* movi_f */
     8, /* ldr_f */
     16,        /* ldi_f */
     8, /* ldxr_f */
     8, /* ldr_f */
     16,        /* ldi_f */
     8, /* ldxr_f */
     28,        /* bler_f */
     32,        /* blei_f */
     28,        /* beqr_f */
     28,        /* bler_f */
     32,        /* blei_f */
     28,        /* beqr_f */
-    40,        /* beqi_f */
+    48,        /* beqi_f */
     28,        /* bger_f */
     32,        /* bgei_f */
     28,        /* bgtr_f */
     28,        /* bger_f */
     32,        /* bgei_f */
     28,        /* bgtr_f */
     72,        /* unordi_d */
     20,        /* truncr_d_i */
     0, /* truncr_d_l */
     72,        /* unordi_d */
     20,        /* truncr_d_i */
     0, /* truncr_d_l */
-    28,        /* extr_d */
+    36,        /* extr_d */
     22,        /* extr_f_d */
     16,        /* movr_d */
     22,        /* extr_f_d */
     16,        /* movr_d */
-    20,        /* movi_d */
+    32,        /* movi_d */
     16,        /* ldr_d */
     24,        /* ldi_d */
     20,        /* ldxr_d */
     16,        /* ldr_d */
     24,        /* ldi_d */
     20,        /* ldxr_d */
     12,        /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     12,        /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    16,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    40,        /* casr */
-    44,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    12,        /* cto */
+    8, /* ctz */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
index 4b146d2..20f80a2 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -1255,7 +1255,7 @@ _vfp_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
     if (jit_fpr_p(r0)) {
        /* float arguments are packed, for others,
         * lightning only address even registers */
     if (jit_fpr_p(r0)) {
        /* float arguments are packed, for others,
         * lightning only address even registers */
-       if (!(r0 & 1) && (r0 - 16) >= 0 &&
+       if (!(r0 & 1) && (r0 - 32) >= 0 &&
            ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
             (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
            VIMM(code, r0);
            ((code = encode_vfp_double(1, 0, u.i, u.i)) != -1 ||
             (code = encode_vfp_double(1, 1, ~u.i, ~u.i)) != -1))
            VIMM(code, r0);
index 6b121bf..64a70f9 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -21,6 +21,8 @@
 #  include <stdio.h>
 #endif
 
 #  include <stdio.h>
 #endif
 
+#define stack_framesize                        48
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 4)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 16)
 #define jit_arg_d_reg_p(i)             ((i) >= 0 && (i) < 15)
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 4)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 16)
 #define jit_arg_d_reg_p(i)             ((i) >= 0 && (i) < 15)
 #define arm_patch_node                 0x80000000
 #define arm_patch_word                 0x40000000
 #define arm_patch_jump                 0x20000000
 #define arm_patch_node                 0x80000000
 #define arm_patch_word                 0x40000000
 #define arm_patch_jump                 0x20000000
-#define arm_patch_load                 0x00000000
+#define arm_patch_load                 0x10000000
+#define arm_patch_call                 0x08000000
 
 #define jit_fpr_p(rn)                  ((rn) > 15)
 
 
 #define jit_fpr_p(rn)                  ((rn) > 15)
 
-#define arg_base()                                                     \
-    (stack_framesize - 16 + (jit_cpu.abi ? 64 : 0))
+#define arg_base()                     (stack_framesize - 16)
 #define arg_offset(n)                                                  \
     ((n) < 4 ? arg_base() + ((n) << 2) : (n))
 
 #define arg_offset(n)                                                  \
     ((n) < 4 ? arg_base() + ((n) << 2) : (n))
 
  * arm mode, what may cause a crash upon return of that function
  * if generating jit for a relative jump.
  */
  * arm mode, what may cause a crash upon return of that function
  * if generating jit for a relative jump.
  */
-#define jit_exchange_p()               1
+#define jit_exchange_p()               jit_cpu.exchange
 
 /* FIXME is it really required to not touch _R10? */
 
 
 /* FIXME is it really required to not touch _R10? */
 
+#define CHECK_REG_ARGS()                                               \
+    do {                                                               \
+       if (!_jitc->function->save_reg_args)                            \
+           _jitc->again = _jitc->function->save_reg_args = 1;          \
+    } while (0)
+
+#define CHECK_SWF_OFFSET()                                             \
+    do {                                                               \
+       if (!_jitc->function->swf_offset) {                             \
+           _jitc->again = _jitc->function->save_reg_args =             \
+               _jitc->function->swf_offset = 1;                        \
+           _jitc->function->self.aoff = -64;                           \
+       }                                                               \
+    } while (0)
+
+#define CHECK_RETURN()                                                 \
+    do {                                                               \
+       if (!_jitc->function->need_frame &&                             \
+           !_jitc->function->need_return)                              \
+           _jitc->again = _jitc->function->need_return = 1;            \
+    } while (0)
+
 /*
  * Types
  */
 /*
  * Types
  */
@@ -59,8 +83,8 @@ typedef jit_pointer_t jit_va_list;
 /*
  * Prototypes
  */
 /*
  * Prototypes
  */
-#define jit_make_arg(node)             _jit_make_arg(_jit,node)
-static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*);
+#define jit_make_arg(node,code)                _jit_make_arg(_jit,node,code)
+static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t);
 #define jit_make_arg_f(node)           _jit_make_arg_f(_jit,node)
 static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
 #define jit_make_arg_d(node)           _jit_make_arg_d(_jit,node)
 #define jit_make_arg_f(node)           _jit_make_arg_f(_jit,node)
 static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
 #define jit_make_arg_d(node)           _jit_make_arg_d(_jit,node)
@@ -77,8 +101,10 @@ static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t);
 static void _flush_consts(jit_state_t*);
 #define invalidate_consts()            _invalidate_consts(_jit)
 static void _invalidate_consts(jit_state_t*);
 static void _flush_consts(jit_state_t*);
 #define invalidate_consts()            _invalidate_consts(_jit)
 static void _invalidate_consts(jit_state_t*);
-#define patch(instr, node)             _patch(_jit, instr, node)
-static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
+#define patch(instr, node, kind)       _patch(_jit, instr, node, kind)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*,jit_int32_t);
 
 #if defined(__GNUC__)
 /* libgcc */
 
 #if defined(__GNUC__)
 /* libgcc */
@@ -149,6 +175,10 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _R4, _R5, _R6, _R7, _R8, _R9,
+};
+
 /*
  * Implementation
  */
 /*
  * Implementation
  */
@@ -202,6 +232,14 @@ jit_get_cpu(void)
     /* armv6t2 todo (software float and thumb2) */
     if (!jit_cpu.vfp && jit_cpu.thumb)
        jit_cpu.thumb = 0;
     /* armv6t2 todo (software float and thumb2) */
     if (!jit_cpu.vfp && jit_cpu.thumb)
        jit_cpu.thumb = 0;
+    /* FIXME need test environments for the below. For the moment just
+     * be very conservative */
+    /* force generation of code assuming jit and function libraries called
+     * instruction set do not match */
+    jit_cpu.exchange = 1;
+    /* do not generate hardware integer division by default */
+    if (jit_cpu.version == 7)
+       jit_cpu.extend = 0;
 }
 
 void
 }
 
 void
@@ -245,15 +283,10 @@ _jit_prolog(jit_state_t *_jit)
     }
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
     _jitc->function->self.size = stack_framesize;
     }
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
     _jitc->function->self.size = stack_framesize;
-    if (jit_cpu.abi)
-       _jitc->function->self.size += 64;
     _jitc->function->self.argi = _jitc->function->self.argf =
     _jitc->function->self.argi = _jitc->function->self.argf =
-       _jitc->function->self.alen = 0;
-    if (jit_swf_p())
-       /* 8 soft float registers */
-       _jitc->function->self.aoff = -64;
-    else
-       _jitc->function->self.aoff = 0;
+       _jitc->function->self.alen = _jitc->function->self.aoff = 0;
+    _jitc->function->swf_offset = _jitc->function->save_reg_args =
+       _jitc->function->need_return = 0;
     _jitc->function->self.call = jit_call_default;
     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
              _jitc->reglen * sizeof(jit_int32_t));
     _jitc->function->self.call = jit_call_default;
     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
              _jitc->reglen * sizeof(jit_int32_t));
@@ -279,6 +312,9 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    if (jit_swf_p())
+       CHECK_SWF_OFFSET();
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -327,20 +363,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -422,7 +456,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code != jit_code_arg) {
+    if (!(u->code >= jit_code_arg_c && u->code <= jit_code_arg)) {
        if (u->code == jit_code_arg_f) {
            if (jit_cpu.abi)
                return (jit_arg_f_reg_p(u->u.w));
        if (u->code == jit_code_arg_f) {
            if (jit_cpu.abi)
                return (jit_arg_f_reg_p(u->u.w));
@@ -437,7 +471,7 @@ _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 }
 
 static jit_node_t *
 }
 
 static jit_node_t *
-_jit_make_arg(jit_state_t *_jit, jit_node_t *node)
+_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code)
 {
     jit_int32_t                 offset;
     if (jit_arg_reg_p(_jitc->function->self.argi))
 {
     jit_int32_t                 offset;
     if (jit_arg_reg_p(_jitc->function->self.argi))
@@ -447,7 +481,7 @@ _jit_make_arg(jit_state_t *_jit, jit_node_t *node)
        _jitc->function->self.size += sizeof(jit_word_t);
     }
     if (node == (jit_node_t *)0)
        _jitc->function->self.size += sizeof(jit_word_t);
     }
     if (node == (jit_node_t *)0)
-       node = jit_new_node(jit_code_arg);
+       node = jit_new_node(code);
     else
        link_node(node);
     node->u.w = offset;
     else
        link_node(node);
     node->u.w = offset;
@@ -534,9 +568,10 @@ _jit_ellipsis(jit_state_t *_jit)
     else {
        assert(!(_jitc->function->self.call & jit_call_varargs));
        _jitc->function->self.call |= jit_call_varargs;
     else {
        assert(!(_jitc->function->self.call & jit_call_varargs));
        _jitc->function->self.call |= jit_call_varargs;
+       CHECK_REG_ARGS();
        if (jit_cpu.abi &&  _jitc->function->self.argf)
            rewind_prolog();
        if (jit_cpu.abi &&  _jitc->function->self.argf)
            rewind_prolog();
-       /* First 4 stack addresses are always spilled r0-r3 */
+       /* First 4 stack addresses need to be spilled r0-r3 */
        if (jit_arg_reg_p(_jitc->function->self.argi))
            _jitc->function->vagp = _jitc->function->self.argi * 4;
        else
        if (jit_arg_reg_p(_jitc->function->self.argi))
            _jitc->function->vagp = _jitc->function->self.argi * 4;
        else
@@ -559,16 +594,21 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    return (jit_make_arg((jit_node_t*)0));
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
+    return (jit_make_arg((jit_node_t*)0, code));
 }
 
 jit_node_t *
 _jit_arg_f(jit_state_t *_jit)
 {
     assert(_jitc->function);
 }
 
 jit_node_t *
 _jit_arg_f(jit_state_t *_jit)
 {
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
     return (jit_make_arg_f((jit_node_t*)0));
 }
 
     return (jit_make_arg_f((jit_node_t*)0));
 }
 
@@ -576,103 +616,141 @@ jit_node_t *
 _jit_arg_d(jit_state_t *_jit)
 {
     assert(_jitc->function);
 _jit_arg_d(jit_state_t *_jit)
 {
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
     return (jit_make_arg_d((jit_node_t*)0));
 }
 
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     return (jit_make_arg_d((jit_node_t*)0));
 }
 
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_swf_p())
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_swf_p())
-       jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_c(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
     else
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_c(u, JIT_FP, v->u.w);
+       node = jit_ldxi_c(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_swf_p())
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_swf_p())
-       jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_uc(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
     else
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_uc(u, JIT_FP, v->u.w);
+       node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_swf_p())
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_swf_p())
-       jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_s(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
     else
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_s(u, JIT_FP, v->u.w);
+       node = jit_ldxi_s(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_swf_p())
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_swf_p())
-       jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_us(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
     else
     else if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_us(u, JIT_FP, v->u.w);
+       node = jit_ldxi_us(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    jit_node_t         *node = NULL;
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_swf_p())
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_swf_p())
-       jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_i(u, JIT_FP, arg_offset(v->u.w));
     else if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
     else
     else if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
     else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
+       node = jit_ldxi_i(u, JIT_FP, v->u.w);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    jit_node_t         *node = NULL;
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_swf_p())
     if (jit_swf_p())
-       jit_stxi(arg_offset(v->u.w), JIT_FP, u);
+       node = jit_stxi(arg_offset(v->u.w), JIT_FP, u);
     else if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     else
     else if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     else
-       jit_stxi(v->u.w, JIT_FP, u);
+       node = jit_stxi(v->u.w, JIT_FP, u);
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    jit_int32_t                 regno;
+    jit_node_t         *node = NULL;
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_swf_p()) {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
     if (jit_swf_p()) {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(arg_offset(v->u.w), JIT_FP, regno);
+       node = jit_stxi(arg_offset(v->u.w), JIT_FP, regno);
        jit_unget_reg(regno);
     }
     else if (jit_arg_reg_p(v->u.w))
        jit_unget_reg(regno);
     }
     else if (jit_arg_reg_p(v->u.w))
@@ -680,30 +758,41 @@ _jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
     else {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
     else {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, JIT_FP, regno);
+       node = jit_stxi(v->u.w, JIT_FP, regno);
        jit_unget_reg(regno);
     }
        jit_unget_reg(regno);
     }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(getarg_f, u, v);
     if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_f(u, JIT_FA0 - v->u.w);
        else
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(getarg_f, u, v);
     if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_f(u, JIT_FA0 - v->u.w);
        else
-           jit_ldxi_f(u, JIT_FP, v->u.w);
+           node = jit_ldxi_f(u, JIT_FP, v->u.w);
     }
     else if (jit_swf_p())
     }
     else if (jit_swf_p())
-       jit_ldxi_f(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_f(u, JIT_FP, arg_offset(v->u.w));
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_w_f(u, JIT_RA0 - v->u.w);
        else
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_w_f(u, JIT_RA0 - v->u.w);
        else
-           jit_ldxi_f(u, JIT_FP, v->u.w);
+           node = jit_ldxi_f(u, JIT_FP, v->u.w);
+    }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -711,21 +800,27 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(putargr_f, u, v);
     if (jit_cpu.abi) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_f(JIT_FA0 - v->u.w, u);
        else
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(putargr_f, u, v);
     if (jit_cpu.abi) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_f(JIT_FA0 - v->u.w, u);
        else
-           jit_stxi_f(v->u.w, JIT_FP, u);
+           node = jit_stxi_f(v->u.w, JIT_FP, u);
     }
     else if (jit_swf_p())
     }
     else if (jit_swf_p())
-       jit_stxi_f(arg_offset(v->u.w), JIT_FP, u);
+       node = jit_stxi_f(arg_offset(v->u.w), JIT_FP, u);
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_f_w(JIT_RA0 - v->u.w, u);
        else
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_f_w(JIT_RA0 - v->u.w, u);
        else
-           jit_stxi_f(v->u.w, JIT_FP, u);
+           node = jit_stxi_f(v->u.w, JIT_FP, u);
+    }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -733,7 +828,8 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
 {
 void
 _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
 {
-    jit_int32_t                regno;
+    jit_int32_t                 regno;
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_fp(putargi_f, u, v);
     if (jit_cpu.abi) {
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_fp(putargi_f, u, v);
     if (jit_cpu.abi) {
@@ -742,14 +838,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
        else {
            regno = jit_get_reg(jit_class_fpr);
            jit_movi_f(regno, u);
        else {
            regno = jit_get_reg(jit_class_fpr);
            jit_movi_f(regno, u);
-           jit_stxi_f(v->u.w, JIT_FP, regno);
+           node = jit_stxi_f(v->u.w, JIT_FP, regno);
            jit_unget_reg(regno);
        }
     }
     else if (jit_swf_p()) {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
            jit_unget_reg(regno);
        }
     }
     else if (jit_swf_p()) {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(arg_offset(v->u.w), JIT_FP, regno);
+       node = jit_stxi_f(arg_offset(v->u.w), JIT_FP, regno);
        jit_unget_reg(regno);
     }
     else {
        jit_unget_reg(regno);
     }
     else {
@@ -758,30 +854,41 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
        if (jit_arg_reg_p(v->u.w))
            jit_movr_f_w(JIT_RA0 - v->u.w, regno);
        else
        if (jit_arg_reg_p(v->u.w))
            jit_movr_f_w(JIT_RA0 - v->u.w, regno);
        else
-           jit_stxi_f(v->u.w, JIT_FP, regno);
+           node = jit_stxi_f(v->u.w, JIT_FP, regno);
        jit_unget_reg(regno);
     }
        jit_unget_reg(regno);
     }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(getarg_d, u, v);
     if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_d(u, JIT_FA0 - v->u.w);
        else
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(getarg_d, u, v);
     if (jit_cpu.abi && !(_jitc->function->self.call & jit_call_varargs)) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_d(u, JIT_FA0 - v->u.w);
        else
-           jit_ldxi_d(u, JIT_FP, v->u.w);
+           node = jit_ldxi_d(u, JIT_FP, v->u.w);
     }
     else if (jit_swf_p())
     }
     else if (jit_swf_p())
-       jit_ldxi_d(u, JIT_FP, arg_offset(v->u.w));
+       node = jit_ldxi_d(u, JIT_FP, arg_offset(v->u.w));
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_ww_d(u, JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1));
        else
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_ww_d(u, JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1));
        else
-           jit_ldxi_d(u, JIT_FP, v->u.w);
+           node = jit_ldxi_d(u, JIT_FP, v->u.w);
+    }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -789,21 +896,27 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(putargr_d, u, v);
     if (jit_cpu.abi) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_d(JIT_FA0 - v->u.w, u);
        else
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(putargr_d, u, v);
     if (jit_cpu.abi) {
        if (jit_arg_f_reg_p(v->u.w))
            jit_movr_d(JIT_FA0 - v->u.w, u);
        else
-           jit_stxi_d(v->u.w, JIT_FP, u);
+           node = jit_stxi_d(v->u.w, JIT_FP, u);
     }
     else if (jit_swf_p())
     }
     else if (jit_swf_p())
-       jit_stxi_d(arg_offset(v->u.w), JIT_FP, u);
+       node = jit_stxi_d(arg_offset(v->u.w), JIT_FP, u);
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), u);
        else
     else {
        if (jit_arg_reg_p(v->u.w))
            jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), u);
        else
-           jit_stxi_d(v->u.w, JIT_FP, u);
+           node = jit_stxi_d(v->u.w, JIT_FP, u);
+    }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -811,7 +924,8 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 {
 void
 _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 {
-    jit_int32_t                regno;
+    jit_int32_t                 regno;
+    jit_node_t         *node = NULL;
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_dp(putargi_d, u, v);
     if (jit_cpu.abi) {
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_dp(putargi_d, u, v);
     if (jit_cpu.abi) {
@@ -820,14 +934,14 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
        else {
            regno = jit_get_reg(jit_class_fpr);
            jit_movi_d(regno, u);
        else {
            regno = jit_get_reg(jit_class_fpr);
            jit_movi_d(regno, u);
-           jit_stxi_d(v->u.w, JIT_FP, regno);
+           node = jit_stxi_d(v->u.w, JIT_FP, regno);
            jit_unget_reg(regno);
        }
     }
     else if (jit_swf_p()) {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
            jit_unget_reg(regno);
        }
     }
     else if (jit_swf_p()) {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(arg_offset(v->u.w), JIT_FP, regno);
+       node = jit_stxi_d(arg_offset(v->u.w), JIT_FP, regno);
        jit_unget_reg(regno);
     }
     else {
        jit_unget_reg(regno);
     }
     else {
@@ -836,17 +950,22 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
        if (jit_arg_reg_p(v->u.w))
            jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), regno);
        else
        if (jit_arg_reg_p(v->u.w))
            jit_movr_d_ww(JIT_RA0 - v->u.w, JIT_RA0 - (v->u.w + 1), regno);
        else
-           jit_stxi_d(v->u.w, JIT_FP, regno);
+           node = jit_stxi_d(v->u.w, JIT_FP, regno);
        jit_unget_reg(regno);
     }
        jit_unget_reg(regno);
     }
+    if (node) {
+       CHECK_REG_ARGS();
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
@@ -860,11 +979,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
@@ -1148,6 +1267,7 @@ _emit_code(jit_state_t *_jit)
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1293,7 +1413,7 @@ _emit_code(jit_state_t *_jit)
                else {                                                  \
                    word = name##r##type(_jit->pc.w,                    \
                                         rn(node->v.w), rn(node->w.w)); \
                else {                                                  \
                    word = name##r##type(_jit->pc.w,                    \
                                         rn(node->v.w), rn(node->w.w)); \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break
 #define case_bvv(name, type)                                           \
                }                                                       \
                break
 #define case_bvv(name, type)                                           \
@@ -1318,7 +1438,7 @@ _emit_code(jit_state_t *_jit)
                        word = vfp_##name##r##type(_jit->pc.w,          \
                                                   rn(node->v.w),       \
                                                   rn(node->w.w));      \
                        word = vfp_##name##r##type(_jit->pc.w,          \
                                                   rn(node->v.w),       \
                                                   rn(node->w.w));      \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break
 #define case_brw(name, type)                                           \
                }                                                       \
                break
 #define case_brw(name, type)                                           \
@@ -1332,7 +1452,7 @@ _emit_code(jit_state_t *_jit)
                else {                                                  \
                    word = name##i##type(_jit->pc.w,                    \
                                         rn(node->v.w), node->w.w);     \
                else {                                                  \
                    word = name##i##type(_jit->pc.w,                    \
                                         rn(node->v.w), node->w.w);     \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break;
 #define case_bvf(name)                                                 \
                }                                                       \
                break;
 #define case_bvf(name)                                                 \
@@ -1357,7 +1477,7 @@ _emit_code(jit_state_t *_jit)
                        word = vfp_##name##i_f(_jit->pc.w,              \
                                               rn(node->v.w),           \
                                               node->w.f);              \
                        word = vfp_##name##i_f(_jit->pc.w,              \
                                               rn(node->v.w),           \
                                               node->w.f);              \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break
 #define case_bvd(name)                                                 \
                }                                                       \
                break
 #define case_bvd(name)                                                 \
@@ -1382,7 +1502,7 @@ _emit_code(jit_state_t *_jit)
                        word = vfp_##name##i_d(_jit->pc.w,              \
                                               rn(node->v.w),           \
                                               node->w.d);              \
                        word = vfp_##name##i_d(_jit->pc.w,              \
                                               rn(node->v.w),           \
                                               node->w.d);              \
-                   patch(word, node);                                  \
+                   patch(word, node, arm_patch_jump);                  \
                }                                                       \
                break
 #if DEVEL_DISASSEMBLER
                }                                                       \
                break
 #if DEVEL_DISASSEMBLER
@@ -1405,6 +1525,12 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               if (jit_thumb_p())
+                   nop((node->u.w + 1) & ~1);
+               else
+                   nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                if (must_align_p(node->next))
                    nop(2);
            case jit_code_note:         case jit_code_name:
                if (must_align_p(node->next))
                    nop(2);
@@ -1456,6 +1582,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1526,7 +1656,7 @@ _emit_code(jit_state_t *_jit)
                        assert(temp->code == jit_code_label ||
                               temp->code == jit_code_epilog);
                        word = movi_p(rn(node->u.w), temp->u.w);
                        assert(temp->code == jit_code_label ||
                               temp->code == jit_code_epilog);
                        word = movi_p(rn(node->u.w), temp->u.w);
-                       patch(word, node);
+                       patch(word, node, arm_patch_word);
                    }
                }
                else
                    }
                }
                else
@@ -1765,6 +1895,7 @@ _emit_code(jit_state_t *_jit)
                case_bvv(bunord, _d);
                case_bvd(bunord);
            case jit_code_jmpr:
                case_bvv(bunord, _d);
                case_bvd(bunord);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                flush_consts();
                break;
                jmpr(rn(node->u.w));
                flush_consts();
                break;
@@ -1776,36 +1907,59 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w, 1);
-                       patch(word, node);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (jit_thumb_p())      word >>= 1;
+                       else                    word >>= 2;
+                       word -= 2;
+                       value = _s24P(word);
+                       word = jmpi_p(_jit->pc.w, value);
+                       patch(word, node, value ?
+                             arm_patch_jump : arm_patch_word);
                    }
                }
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
                    jmpi(node->u.w);
+               }
                flush_consts();
                break;
            case jit_code_callr:
                flush_consts();
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
                if (node->flag & jit_flag_node) {
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
                if (node->flag & jit_flag_node) {
+                   CHECK_RETURN();
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
-                       calli(temp->u.w);
+                       calli(temp->u.w, 0);
                    else {
                    else {
-                       word = calli_p(_jit->pc.w);
-                       patch(word, node);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (jit_exchange_p())
+                           word -= 8;
+                       if (jit_thumb_p())      word >>= 1;
+                       else                    word >>= 2;
+                       word -= 2;
+                       value = _s24P(word);
+                       word = calli_p(_jit->pc.w, value);
+                       patch(word, node, value ?
+                             arm_patch_call : arm_patch_word);
                    }
                }
                    }
                }
-               else
-                   calli(node->u.w);
+               else {
+                   jit_check_frame();
+                   calli(node->u.w, jit_exchange_p());
+               }
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1819,6 +1973,8 @@ _emit_code(jit_state_t *_jit)
 #endif
            restart_function:
                _jitc->again = 0;
 #endif
            restart_function:
                _jitc->again = 0;
+               compute_framesize();
+               patch_alist(0);
                prolog(node);
                break;
            case jit_code_epilog:
                prolog(node);
                break;
            case jit_code_epilog:
@@ -1833,6 +1989,21 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   undo.func.need_return = _jitc->function->need_return;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* swf_offset and check_reg_args must also not be undone */
+                   undo.func.swf_offset = _jitc->function->swf_offset;
+                   undo.func.save_reg_args = _jitc->function->save_reg_args;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1845,6 +2016,7 @@ _emit_code(jit_state_t *_jit)
                    if (_jitc->data_info.ptr)
                        _jitc->data_info.offset = undo.info_offset;
 #endif
                    if (_jitc->data_info.ptr)
                        _jitc->data_info.offset = undo.info_offset;
 #endif
+                   patch_alist(1);
                    goto restart_function;
                }
                /* remember label is defined */
                    goto restart_function;
                }
                /* remember label is defined */
@@ -1907,21 +2079,34 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_getarg_s:             case jit_code_getarg_us:
            case jit_code_getarg_i:
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_getarg_s:             case jit_code_getarg_us:
            case jit_code_getarg_i:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case  jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1984,7 +2169,10 @@ _emit_code(jit_state_t *_jit)
        node = _jitc->patches.ptr[offset].node;
        word = _jitc->patches.ptr[offset].inst;
        if (!jit_thumb_p() &&
        node = _jitc->patches.ptr[offset].node;
        word = _jitc->patches.ptr[offset].inst;
        if (!jit_thumb_p() &&
-           (node->code == jit_code_movi || node->code == jit_code_calli)) {
+           (node->code == jit_code_movi ||
+            (node->code == jit_code_calli &&
+             (_jitc->patches.ptr[offset].kind & ~arm_patch_node) ==
+             arm_patch_word))) {
            /* calculate where to patch word */
            value = *(jit_int32_t *)word;
            assert((value & 0x0f700000) == ARM_LDRI);
            /* calculate where to patch word */
            value = *(jit_int32_t *)word;
            assert((value & 0x0f700000) == ARM_LDRI);
@@ -2254,24 +2442,31 @@ _invalidate_consts(jit_state_t *_jit)
 }
 
 static void
 }
 
 static void
-_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = sizeof(jit_word_t) * 2; /* lr+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += sizeof(jit_word_t);
+
+    if (_jitc->function->save_reg_args)
+       _jitc->framesize += 16;
+
+    /* Make sure functions called have a 8 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 7) & -8;
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node, jit_int32_t kind)
 {
     jit_int32_t                 flag;
 {
     jit_int32_t                 flag;
-    jit_int32_t                 kind;
 
     assert(node->flag & jit_flag_node);
 
     assert(node->flag & jit_flag_node);
-    if (node->code == jit_code_movi) {
+    if (node->code == jit_code_movi)
        flag = node->v.n->flag;
        flag = node->v.n->flag;
-       kind = arm_patch_word;
-    }
-    else {
+    else
        flag = node->u.n->flag;
        flag = node->u.n->flag;
-       if (node->code == jit_code_calli ||
-           (node->code == jit_code_jmpi && !(node->flag & jit_flag_node)))
-           kind = arm_patch_word;
-       else
-           kind = arm_patch_jump;
-    }
     assert(!(flag & jit_flag_patch));
     kind |= arm_patch_node;
     if (_jitc->patches.offset >= _jitc->patches.length) {
     assert(!(flag & jit_flag_patch));
     kind |= arm_patch_node;
     if (_jitc->patches.offset >= _jitc->patches.length) {
index 9ad84f1..a6981fa 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -54,7 +54,7 @@ static FILE                    *disasm_stream;
 #endif
 
 #if BINUTILS_2_38
 #endif
 
 #if BINUTILS_2_38
-static int fprintf_styled(void *, enum disassembler_style, const char* fmt, ...)
+static int fprintf_styled(void * stream, enum disassembler_style style, const char* fmt, ...)
 {
   va_list args;
   int r;
 {
   va_list args;
   int r;
@@ -256,7 +256,7 @@ disasm_print_address(bfd_vma addr, struct disassemble_info *info)
     int                         line;
     char                buffer[address_buffer_length];
 
     int                         line;
     char                buffer[address_buffer_length];
 
-    sprintf(buffer, address_buffer_format, (long long)addr);
+    sprintf(buffer, address_buffer_format, addr);
     (*info->fprintf_func)(info->stream, "0x%s", buffer);
 
 #  define _jit                         disasm_jit
     (*info->fprintf_func)(info->stream, "0x%s", buffer);
 
 #  define _jit                         disasm_jit
@@ -406,7 +406,7 @@ _disassemble(jit_state_t *_jit, jit_pointer_t code, jit_int32_t length)
            old_line = line;
        }
 
            old_line = line;
        }
 
-       bytes = sprintf(buffer, address_buffer_format, (long long)pc);
+       bytes = sprintf(buffer, address_buffer_format, pc);
        (*disasm_info.fprintf_func)(disasm_stream, "%*c0x%s\t",
                                    16 - bytes, ' ', buffer);
        pc += (*disasm_print)(pc, &disasm_info);
        (*disasm_info.fprintf_func)(disasm_stream, "%*c0x%s\t",
                                    16 - bytes, ' ', buffer);
        pc += (*disasm_print)(pc, &disasm_info);
index 8912691..2f7f214 100644 (file)
@@ -12,6 +12,55 @@ static void _fallback_calli(jit_state_t*, jit_word_t, jit_word_t);
 #define fallback_casx(r0,r1,r2,r3,im)  _fallback_casx(_jit,r0,r1,r2,r3,im)
 static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t,
                           jit_int32_t, jit_int32_t, jit_word_t);
 #define fallback_casx(r0,r1,r2,r3,im)  _fallback_casx(_jit,r0,r1,r2,r3,im)
 static void _fallback_casx(jit_state_t *, jit_int32_t, jit_int32_t,
                           jit_int32_t, jit_int32_t, jit_word_t);
+#define fallback_clo(r0,r1)            _fallback_clo(_jit,r0,r1)
+static void _fallback_clo(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_clz(r0,r1)            _fallback_clz(_jit,r0,r1)
+static void _fallback_clz(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_cto(r0,r1)            _fallback_cto(_jit,r0,r1)
+static void _fallback_cto(jit_state_t*, jit_int32_t, jit_int32_t);
+#define fallback_ctz(r0,r1)            _fallback_ctz(_jit,r0,r1)
+static void _fallback_ctz(jit_state_t*, jit_int32_t, jit_int32_t);
+#  if defined(__ia64__)
+#    define fallback_patch_jmpi(inst,lbl)                              \
+    do {                                                               \
+       sync();                                                         \
+       patch_at(jit_code_jmpi, inst, lbl);                             \
+    } while (0)
+#  else
+#    define fallback_patch_jmpi(inst,lbl) fallback_patch_at(inst,lbl)
+#  endif
+#  if defined(__arm__)
+#    define fallback_patch_at(inst,lbl)        patch_at(arm_patch_jump,inst,lbl)
+#  elif defined(__ia64__)
+#    define fallback_patch_at(inst,lbl)                                        \
+    do {                                                               \
+       sync();                                                         \
+       patch_at(jit_code_bnei, inst, lbl);                             \
+    } while (0);
+#  else
+#    define fallback_patch_at(inst,lbl)        patch_at(inst,lbl)
+#  endif
+#  if defined(__mips__)
+#    define fallback_jmpi(i0)          jmpi(i0,1)
+#  elif defined(__arm__)
+#    define fallback_jmpi(i0)          jmpi_p(i0,1)
+#  elif defined(__s390__) || defined(__s390x__)
+#    define fallback_jmpi(i0)          jmpi(i0,1)
+#  else
+#    define fallback_jmpi(i0)          jmpi(i0)
+#  endif
+#  if defined(__mips__)
+#    define fallback_bnei(i0,r0,i1)    bnei(i0,r0,i1)
+#  elif defined(__s390__) || defined(__s390x__)
+#    define fallback_bnei(i0,r0,i1)    bnei_p(i0,r0,i1)
+#  else
+#    define fallback_bnei(i0,r0,i1)    bnei(i0,r0,i1)
+#  endif
+#  if defined(__s390__) || defined(__s390x__)
+#    define fallback_bmsr(i0,r0,r1)    bmsr_p(i0,r0,r1)
+#  else
+#    define fallback_bmsr(i0,r0,r1)    bmsr(i0,r0,r1)
+#  endif
 #endif
 
 #if CODE
 #endif
 
 #if CODE
@@ -96,16 +145,20 @@ _fallback_calli(jit_state_t *_jit, jit_word_t i0, jit_word_t i1)
 {
 #  if defined(__arm__)
     movi(rn(_R0), i1);
 {
 #  if defined(__arm__)
     movi(rn(_R0), i1);
-#  elif defined(__ia64__)
-    /* avoid confusion with pushargi patching */
-    if (i1 >= -2097152 && i1 <= 2097151)
-       MOVI(_jitc->rout, i1);
-    else
-       MOVL(_jitc->rout, i1);
 #  elif defined(__hppa__)
     movi(_R26_REGNO, i1);
 #  elif defined(__hppa__)
     movi(_R26_REGNO, i1);
-#endif
+#  endif
+#  if defined(__arm__)
+    calli(i0, jit_exchange_p());
+#  elif defined(__mips__)
+    calli(i0, 0);
+#  elif defined(__powerpc__) && _CALL_SYSV
+    calli(i0, 0);
+#  elif defined(__s390__) || defined(__s390x__)
+    calli(i0, 0);
+#  else
     calli(i0);
     calli(i0);
+#  endif
 }
 
 static void
 }
 
 static void
@@ -128,7 +181,7 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     fallback_load(r2);
     eqr(r0, r0, r2);
     fallback_save(r0);
     fallback_load(r2);
     eqr(r0, r0, r2);
     fallback_save(r0);
-    jump = bnei(_jit->pc.w, r0, 1);
+    jump = fallback_bnei(_jit->pc.w, r0, 1);
     fallback_load(r3);
 #  if __WORDSIZE == 32
     str_i(r1, r3);
     fallback_load(r3);
 #  if __WORDSIZE == 32
     str_i(r1, r3);
@@ -136,21 +189,144 @@ _fallback_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     str_l(r1, r3);
 #  endif
     /* done: */
     str_l(r1, r3);
 #  endif
     /* done: */
-#  if defined(__ia64__)
-    sync();
-# endif
     done = _jit->pc.w;
     fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex);
     fallback_load(r0);
     done = _jit->pc.w;
     fallback_calli((jit_word_t)pthread_mutex_unlock, (jit_word_t)&mutex);
     fallback_load(r0);
-#  if defined(__arm__)
-    patch_at(arm_patch_jump, jump, done);
-#  elif defined(__ia64__)
-    patch_at(jit_code_bnei, jump, done);
-#  else
-    patch_at(jump, done);
-#  endif
+    fallback_patch_at(jump, done);
     fallback_load_regs(r0);
     if (iscasi)
        jit_unget_reg(r1_reg);
 }
     fallback_load_regs(r0);
     if (iscasi)
        jit_unget_reg(r1_reg);
 }
+
+static void
+_fallback_clo(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         clz, done;
+    comr(r0, r1);
+    clz = fallback_bnei(_jit->pc.w, r0, 0);
+    movi(r0, __WORDSIZE);
+    done = fallback_jmpi(_jit->pc.w);
+    fallback_patch_at(clz, _jit->pc.w);
+    fallback_clz(r0, r0);
+    fallback_patch_jmpi(done, _jit->pc.w);
+}
+
+static void
+_fallback_clz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                r1_reg, r2, r2_reg;
+    jit_word_t         clz, l32, l16, l8, l4, l2, l1;
+    l32 = fallback_bnei(_jit->pc.w, r1, 0);
+    movi(r0, __WORDSIZE);
+    clz = fallback_jmpi(_jit->pc.w);
+    fallback_patch_at(l32, _jit->pc.w);
+    r2_reg = jit_get_reg(jit_class_gpr);
+    r2 = rn(r2_reg);
+    r1_reg = jit_get_reg(jit_class_gpr);
+    movr(rn(r1_reg), r1);
+    r1 = rn(r1_reg);
+    movi(r0, 0);
+#  if __WORDSIZE == 64
+    movi(r2, 0xffffffff00000000UL);
+    l32 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 32);
+    addi(r0, r0, 32);
+    fallback_patch_at(l32, _jit->pc.w);
+    lshi(r2, r2, 16);
+#  else
+    movi(r2, 0xffff0000UL);
+#  endif
+    l16 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 16);
+    addi(r0, r0, 16);
+    fallback_patch_at(l16, _jit->pc.w);
+    lshi(r2, r2, 8);
+    l8 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 8);
+    addi(r0, r0, 8);
+    fallback_patch_at(l8, _jit->pc.w);
+    lshi(r2, r2, 4);
+    l4 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 4);
+    addi(r0, r0, 4);
+    fallback_patch_at(l4, _jit->pc.w);
+    lshi(r2, r2, 2);
+    l2 = fallback_bmsr(_jit->pc.w, r1, r2);
+    lshi(r1, r1, 2);
+    addi(r0, r0, 2);
+    fallback_patch_at(l2, _jit->pc.w);
+    lshi(r2, r2, 1);
+    l1 = fallback_bmsr(_jit->pc.w, r1, r2);
+    addi(r0, r0, 1);
+    fallback_patch_at(l1, _jit->pc.w);
+    fallback_patch_jmpi(clz, _jit->pc.w);
+    jit_unget_reg(r2_reg);
+    jit_unget_reg(r1_reg);
+}
+
+static void
+_fallback_cto(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         ctz, done;
+    comr(r0, r1);
+    ctz = fallback_bnei(_jit->pc.w, r0, 0);
+    movi(r0, __WORDSIZE);
+    done = fallback_jmpi(_jit->pc.w);
+    fallback_patch_at(ctz, _jit->pc.w);
+    fallback_ctz(r0, r0);
+    fallback_patch_jmpi(done, _jit->pc.w);
+}
+
+static void
+_fallback_ctz(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                r1_reg, r2, r2_reg;
+    jit_word_t         ctz, l32, l16, l8, l4, l2, l1;
+    l32 = fallback_bnei(_jit->pc.w, r1, 0);
+    movi(r0, __WORDSIZE);
+    ctz = fallback_jmpi(_jit->pc.w);
+    fallback_patch_at(l32, _jit->pc.w);
+    r2_reg = jit_get_reg(jit_class_gpr);
+    r2 = rn(r2_reg);
+    r1_reg = jit_get_reg(jit_class_gpr);
+    movr(rn(r1_reg), r1);
+    r1 = rn(r1_reg);
+    movi(r0, 0);
+#  if __WORDSIZE == 64
+    movi(r2, 0xffffffffUL);
+    l32 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 32);
+    addi(r0, r0, 32);
+    fallback_patch_at(l32, _jit->pc.w);
+    rshi(r2, r2, 16);
+#  else
+    movi(r2, 0xffffUL);
+#  endif
+    l16 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 16);
+    addi(r0, r0, 16);
+    fallback_patch_at(l16, _jit->pc.w);
+    rshi(r2, r2, 8);
+    l8 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 8);
+    addi(r0, r0, 8);
+    fallback_patch_at(l8, _jit->pc.w);
+    rshi(r2, r2, 4);
+    l4 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 4);
+    addi(r0, r0, 4);
+    fallback_patch_at(l4, _jit->pc.w);
+    rshi(r2, r2, 2);
+    l2 = fallback_bmsr(_jit->pc.w, r1, r2);
+    rshi_u(r1, r1, 2);
+    addi(r0, r0, 2);
+    fallback_patch_at(l2, _jit->pc.w);
+    rshi(r2, r2, 1);
+    l1 = fallback_bmsr(_jit->pc.w, r1, r2);
+    addi(r0, r0, 1);
+    fallback_patch_at(l1, _jit->pc.w);
+    fallback_patch_jmpi(ctz, _jit->pc.w);
+    jit_unget_reg(r2_reg);
+    jit_unget_reg(r1_reg);
+}
 #endif
 #endif
index 013460c..ebb01fd 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -648,6 +648,10 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #define movi_p(r0,i0)          _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #define movi_p(r0,i0)          _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#  define bswapr_us(r0, r1)            _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0, r1)            _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define movnr(r0,r1,r2)              _movnr(_jit,r0,r1,r2)
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 #  define movnr(r0,r1,r2)              _movnr(_jit,r0,r1,r2)
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
@@ -663,8 +667,6 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 #define extr_uc(r0,r1)         EXTRWR_U(r1,31,8,r0)
 #define extr_s(r0,r1)          EXTRWR(r1,31,16,r0)
 #define extr_us(r0,r1)         EXTRWR_U(r1,31,16,r0)
 #define extr_uc(r0,r1)         EXTRWR_U(r1,31,8,r0)
 #define extr_s(r0,r1)          EXTRWR(r1,31,16,r0)
 #define extr_us(r0,r1)         EXTRWR_U(r1,31,16,r0)
-#define bswapr_us(r0,r1)       generic_bswapr_us(_jit,r0,r1)
-#define bswapr_ui(r0,r1)       generic_bswapr_ui(_jit,r0,r1)
 #define addr(r0,r1,r2)         ADD(r1,r2,r0)
 #define addi(r0,r1,i0)         _addi(_jit,r0,r1,i0)
 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #define addr(r0,r1,r2)         ADD(r1,r2,r0)
 #define addi(r0,r1,i0)         _addi(_jit,r0,r1,i0)
 static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
@@ -912,7 +914,7 @@ static jit_word_t _bxsubi_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #define jmpr(r0)               _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #define jmpi(i0)               _jmpi(_jit,i0)
 #define jmpr(r0)               _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #define jmpi(i0)               _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #define jmpi_p(i0)             _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #define callr(r0)              _callr(_jit,r0)
 #define jmpi_p(i0)             _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #define callr(r0)              _callr(_jit,r0)
@@ -1638,6 +1640,42 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     return (w);
 }
 
     return (w);
 }
 
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       EXTRWR_U(rn(reg), 23, 8, r0);
+       DEPWR(rn(reg), 23, 8, r0);
+       jit_unget_reg(reg);
+    }
+    else {
+       EXTRWR_U(r1, 23, 8, r0);
+       DEPWR(r1, 23, 8, r0);
+    }
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       movr(rn(reg), r1);
+       SHRPWI(rn(reg), rn(reg), 16, r0);
+       DEPWR(r0, 15, 8, r0);
+       SHRPWI(rn(reg), r0, 8, r0);
+       jit_unget_reg(reg);
+    }
+    else {
+       SHRPWI(r1, r1, 16, r0);
+       DEPWR(r0, 15, 8, r0);
+       SHRPWI(r1, r0, 8, r0);
+    }
+}
+
 static void
 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2632,17 +2670,19 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
     BV_N(_R0_REGNO, r0);
 }
 
     BV_N(_R0_REGNO, r0);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
-    w = ((i0 - _jit->pc.w) >> 2) - 2;
-    if (w >= -32768 && w <= 32767)
-       B_N(w, _R0_REGNO);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = ((i0 - w) >> 2) - 2;
+    if (d >= -32768 && d <= 32767)
+       B_N(d, _R0_REGNO);
     else {
     else {
-       movi(_R1_REGNO, w);
+       movi(_R1_REGNO, d);
        BV_N(_R0_REGNO, _R1_REGNO);
     }
        BV_N(_R0_REGNO, _R1_REGNO);
     }
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
index 6b2838d..ed141a7 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index 33ac908..e41f89c 100644 (file)
@@ -3,9 +3,10 @@
 #define JIT_INSTR_MAX 196
     0, /* data */
     0, /* live */
 #define JIT_INSTR_MAX 196
     0, /* data */
     0, /* live */
-    0, /* align */
+    28,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    0,  /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     20,        /* va_arg_d */
     4, /* va_start */
     8, /* va_arg */
     20,        /* va_arg_d */
     8, /* movi */
     12,        /* movnr */
     12,        /* movzr */
     8, /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    88,        /* casr */
+    96,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    12,        /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     40,        /* callr */
     44,        /* calli */
     0, /* prepare */
     40,        /* callr */
     44,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    28,        /* bswapr_us */
-    68,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    88,        /* casr */
-    96,        /* casi */
+    160,       /* clo */
+    140,       /* clz */
+    164,       /* cto */
+    144,       /* ctz */
 #endif /* __WORDSIZE */
 #endif /* __WORDSIZE */
index 2c826d8..d3c5ef7 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -245,18 +245,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
+    jit_code_inc_synth_w(code, u);
     jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -310,7 +310,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    assert(u->code == jit_code_arg ||
+    assert((u->code >= jit_code_arg_c && u->code <= jit_code_arg) ||
           u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_reg_p(u->u.w));
 }
           u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_reg_p(u->u.w));
 }
@@ -343,17 +343,21 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     _jitc->function->self.size -= sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else
        offset = _jitc->function->self.size;
     _jitc->function->self.size -= sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else
        offset = _jitc->function->self.size;
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -406,7 +410,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (v->u.w >= 0)
        jit_extr_c(u, _R26 - v->u.w);
     jit_inc_synth_wp(getarg_c, u, v);
     if (v->u.w >= 0)
        jit_extr_c(u, _R26 - v->u.w);
@@ -418,7 +422,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (v->u.w >= 0)
        jit_extr_uc(u, _R26 - v->u.w);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (v->u.w >= 0)
        jit_extr_uc(u, _R26 - v->u.w);
@@ -430,7 +434,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (v->u.w >= 0)
        jit_extr_s(u, _R26 - v->u.w);
     jit_inc_synth_wp(getarg_s, u, v);
     if (v->u.w >= 0)
        jit_extr_s(u, _R26 - v->u.w);
@@ -442,7 +446,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (v->u.w >= 0)
        jit_extr_us(u, _R26 - v->u.w);
     jit_inc_synth_wp(getarg_us, u, v);
     if (v->u.w >= 0)
        jit_extr_us(u, _R26 - v->u.w);
@@ -454,7 +458,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (v->u.w >= 0)
        jit_movr(u, _R26 - v->u.w);
     jit_inc_synth_wp(getarg_i, u, v);
     if (v->u.w >= 0)
        jit_movr(u, _R26 - v->u.w);
@@ -464,10 +468,10 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (v->u.w >= 0)
        jit_movr(_R26 - v->u.w, u);
     else
     if (v->u.w >= 0)
        jit_movr(_R26 - v->u.w, u);
     else
@@ -476,11 +480,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (v->u.w >= 0)
        jit_movi(_R26 - v->u.w, u);
     else {
     if (v->u.w >= 0)
        jit_movi(_R26 - v->u.w, u);
     else {
@@ -575,10 +579,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     _jitc->function->call.size -= sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
     jit_link_prepare();
     _jitc->function->call.size -= sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -591,11 +595,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     _jitc->function->call.size -= sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
     jit_link_prepare();
     _jitc->function->call.size -= sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -859,6 +863,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -979,6 +984,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1060,6 +1068,14 @@ _emit_code(jit_state_t *_jit)
                break;
                case_rr(neg,);
                case_rr(com,);
                break;
                case_rr(neg,);
                case_rr(com,);
+#define clor(r0, r1)   fallback_clo(r0, r1)
+#define clzr(r0, r1)   fallback_clz(r0, r1)
+#define ctor(r0, r1)   fallback_cto(r0, r1)
+#define ctzr(r0, r1)   fallback_ctz(r0, r1)
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
@@ -1339,7 +1355,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (word >= -32768 && word <= 32767)
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
@@ -1368,6 +1389,7 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1388,6 +1410,18 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo.
+                    * Note that for hppa use '-' instead of '+' as hppa
+                    * stack grows up */
+                   undo.func.self.aoff = _jitc->function->frame -
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1409,24 +1443,37 @@ _emit_code(jit_state_t *_jit)
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
-           case jit_code_live:
-           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_getarg_s:             case jit_code_getarg_us:
            case jit_code_getarg_i:
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_getarg_s:             case jit_code_getarg_us:
            case jit_code_getarg_i:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
index 068bc07..98a10c3 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -1301,6 +1301,16 @@ static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define bitswap(r0, r1)                        _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+#define clor(r0, r1)                   _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#define clzr(r0, r1)                   _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#define ctor(r0, r1)                   _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#define ctzr(r0, r1)                   _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #define negr(r0,r1)                    subr(r0,0,r1)
 #define comr(r0,r1)                    ANDCMI(r0,-1,r1)
 #define movr(r0,r1)                    _movr(_jit,r0,r1)
 #define negr(r0,r1)                    subr(r0,0,r1)
 #define comr(r0,r1)                    ANDCMI(r0,-1,r1)
 #define movr(r0,r1)                    _movr(_jit,r0,r1)
@@ -1500,7 +1510,7 @@ static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define jmpr(r0)                       _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #define jmpi(i0)                       _jmpi(_jit,i0)
 #define jmpr(r0)                       _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #define jmpi(i0)                       _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #define jmpi_p(i0)                     _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #define callr(r0)                      _callr(_jit,r0)
 #define jmpi_p(i0)                     _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #define callr(r0)                      _callr(_jit,r0)
@@ -2456,7 +2466,7 @@ _I9(jit_state_t *_jit, jit_word_t _p,
     TSTREG1(r3);
     TSTPRED(_p);
     TSTREG1(r1);
     TSTREG1(r3);
     TSTPRED(_p);
     TSTREG1(r1);
-    inst((7L<<37)|(1L<<34)|(1L<<34)|(1L<<33)|
+    inst((7L<<37)|(1L<<34)|(1L<<33)|
         (x2<<30)|(1L<<28)|(r3<<20)|(r1<<6)|_p, INST_I);
     SETREG(r1);
 }
         (x2<<30)|(1L<<28)|(r3<<20)|(r1<<6)|_p, INST_I);
     SETREG(r1);
 }
@@ -3465,6 +3475,94 @@ _nop(jit_state_t *_jit, jit_int32_t i0)
     assert(i0 == 0);
 }
 
     assert(i0 == 0);
 }
 
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1, t2, t3, t4;
+    movr(r0, r1);
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+    rshi_u(rn(t1), r0, 1);             /* t1 = v >> 1 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 1);           /* t2 <<= 1 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+    rshi_u(rn(t1), r0, 2);             /* t1 = v >> 2 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 2);           /* t2 <<= 2 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+    rshi_u(rn(t1), r0, 4);             /* t1 = v >> 4 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 4);           /* t2 <<= 4 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ?  0x00ff00ffL : 0x00ff00ff00ff00ffL);
+    rshi_u(rn(t1), r0, 8);             /* t1 = v >> 8 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 8);           /* t2 <<= 8 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), 0x0000ffff0000ffffL);
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 16);          /* t2 <<= 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    rshi_u(rn(t1), r0, 32);            /* t1 = v >> 32 */
+    lshi(rn(t2), r0, 32);              /* t2 = v << 32 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.clz)
+       CLZ(r0, r1);
+    else
+       fallback_clz(r0, r1);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.clz) {
+       comr(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_clo(r0, r1);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.clz) {
+       bitswap(r0, r1);
+       clor(r0, r0);
+    }
+    else
+       fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.clz) {
+       bitswap(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_ctz(r0, r1);
+}
+
 static void
 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -5145,16 +5243,18 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
     BR(BR_6);
 }
 
     BR(BR_6);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         d;
+    jit_word_t         d, w;
     sync();
     sync();
-    d = ((jit_word_t)i0 - _jit->pc.w) >> 4;
+    w = _jit->pc.w;
+    d = ((jit_word_t)i0 - w) >> 4;
     if (d >= -16777216 && d <= 16777215)
        BRI(d);
     else
        BRL(d);
     if (d >= -16777216 && d <= 16777215)
        BRI(d);
     else
        BRL(d);
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -5400,14 +5500,16 @@ _patch_at(jit_state_t *_jit, jit_code_t code,
            i1  = (ic >> 61) &           0x1L;
            i41 = (ic >> 22) & 0x1ffffffffffL;
            i20 =  ic        &       0xfffffL;
            i1  = (ic >> 61) &           0x1L;
            i41 = (ic >> 22) & 0x1ffffffffffL;
            i20 =  ic        &       0xfffffL;
-           assert((tm & ~1) == TM_M_L_X_ &&
+           if (!((tm & ~1) == TM_M_L_X_ &&
                   (s2 & 0xfL<<37) == (0xcL<<37) &&
                   (s2 & 0xfL<<37) == (0xcL<<37) &&
-                  s0 == nop_m);
+                 s0 == nop_m))
+               goto short_jump;
            s1 = i41;
            s2 &= (0xcL<<37)|(0x7L<<33)|(1L<<12);
            s2 |= (i1<<36)|(i20<<13);
            break;
        default:
            s1 = i41;
            s2 &= (0xcL<<37)|(0x7L<<33)|(1L<<12);
            s2 |= (i1<<36)|(i20<<13);
            break;
        default:
+       short_jump:
            /* Only B1 in slot 0 expected due to need to either
             * a stop to update predicates, or a sync before
             * unconditional short branch */
            /* Only B1 in slot 0 expected due to need to either
             * a stop to update predicates, or a sync before
             * unconditional short branch */
index 344977e..f0fb32c 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index e65da54..e1d973c 100644 (file)
@@ -1,10 +1,11 @@
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 224
+#define JIT_INSTR_MAX 608
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    0, /* align */
+    48,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    16,        /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     16,        /* va_start */
     32,        /* va_arg */
     32,        /* va_arg_d */
     16,        /* va_start */
     32,        /* va_arg */
     32,        /* va_arg_d */
     16,        /* movi */
     16,        /* movnr */
     16,        /* movzr */
     16,        /* movi */
     16,        /* movnr */
     16,        /* movzr */
+    48,        /* casr */
+    64,        /* casi */
     16,        /* extr_c */
     16,        /* extr_uc */
     16,        /* extr_s */
     16,        /* extr_us */
     16,        /* extr_i */
     16,        /* extr_ui */
     16,        /* extr_c */
     16,        /* extr_uc */
     16,        /* extr_s */
     16,        /* extr_us */
     16,        /* extr_i */
     16,        /* extr_ui */
+    32,        /* bswapr_us */
+    32,        /* bswapr_ui */
+    16,        /* bswapr_ul */
     32,        /* htonr_us */
     32,        /* htonr_ui */
     16,        /* htonr_ul */
     32,        /* htonr_us */
     32,        /* htonr_ui */
     16,        /* htonr_ul */
     32,        /* callr */
     48,        /* calli */
     0, /* prepare */
     32,        /* callr */
     48,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     16,        /* movr_d_w */
     32,        /* movi_d_w */
     0, /* movi_d_ww */
     16,        /* movr_d_w */
     32,        /* movi_d_w */
-    32,        /* bswapr_us */
-    32,        /* bswapr_ui */
-    16,        /* bswapr_ul */
-    48,        /* casr */
-    64,        /* casi */
+    608,       /* clo */
+    544,       /* clz */
+    608,       /* cto */
+    544,       /* ctz */
 #endif /* __WORDSIZE */
 #endif /* __WORDSIZE */
index 1c35fb1..2968278 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -58,6 +58,7 @@ extern void __clear_cache(void *, void *);
 /*
  * Initialization
  */
 /*
  * Initialization
  */
+jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
     /* Always 0 */
     { 0,                "r0"  },
 jit_register_t         _rvs[] = {
     /* Always 0 */
     { 0,                "r0"  },
@@ -239,6 +240,11 @@ jit_register_t             _rvs[] = {
 void
 jit_get_cpu(void)
 {
 void
 jit_get_cpu(void)
 {
+    jit_word_t         clz = -1;
+    __asm__ volatile("tf.nz.unc p6,p7=32;(p6)mov %0=1;(p7)mov %0=0"
+                    : "=r" (clz));
+    assert(clz == 0 || clz == 1);
+    jit_cpu.clz = clz;
 }
 
 void
 }
 
 void
@@ -345,18 +351,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
+    jit_code_inc_synth_w(code, u);
     jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -410,9 +416,10 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    assert(u->code == jit_code_arg ||
-          u->code == jit_code_arg_f || u->code == jit_code_arg_d);
-    return (jit_arg_reg_p(u->u.w));
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
 }
 
 void
 }
 
 void
@@ -442,18 +449,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     }
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -508,7 +519,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _R32 + v->u.w);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _R32 + v->u.w);
@@ -520,7 +531,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _R32 + v->u.w);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _R32 + v->u.w);
@@ -532,7 +543,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _R32 + v->u.w);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _R32 + v->u.w);
@@ -544,7 +555,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _R32 + v->u.w);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _R32 + v->u.w);
@@ -556,7 +567,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, _R32 + v->u.w);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, _R32 + v->u.w);
@@ -568,7 +579,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _R32 + v->u.w);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _R32 + v->u.w);
@@ -580,7 +591,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _R32 + v->u.w);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _R32 + v->u.w);
@@ -590,10 +601,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_R32 + v->u.w, u);
     else
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_R32 + v->u.w, u);
     else
@@ -602,11 +613,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_R32 + v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_R32 + v->u.w, u);
     else {
@@ -713,10 +724,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_OUT0 + _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_OUT0 + _jitc->function->call.argi, u);
@@ -730,11 +741,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_OUT0 + _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_OUT0 + _jitc->function->call.argi, u);
@@ -973,6 +984,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1122,6 +1134,10 @@ _emit_code(jit_state_t *_jit)
                if (node->u.w > 8)
                    nop(node->u.w - 8);
                break;
                if (node->u.w > 8)
                    nop(node->u.w - 8);
                break;
+           case jit_code_skip:
+               sync();
+               nop((node->u.w + 7) & ~7);
+               break;
            case jit_code_note:         case jit_code_name:
                sync();
                node->u.w = _jit->pc.w;
            case jit_code_note:         case jit_code_name:
                sync();
                node->u.w = _jit->pc.w;
@@ -1177,6 +1193,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
            case jit_code_casr:
                casr(rn(node->u.w), rn(node->v.w),
                     rn(node->w.q.l), rn(node->w.q.h));
            case jit_code_casr:
                casr(rn(node->u.w), rn(node->v.w),
                     rn(node->w.q.l), rn(node->w.q.h));
@@ -1504,7 +1524,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (word  >= -16777216 && word <= 16777215)
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
@@ -1533,6 +1558,7 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1571,6 +1597,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1599,14 +1635,21 @@ _emit_code(jit_state_t *_jit)
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
-           case jit_code_live:
-           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:                case jit_code_arg_l:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1614,10 +1657,22 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
index 052d9ac..ab05852 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2022  Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -31,7 +31,6 @@
 # define _RA_REGNO                     1
 # define _SP_REGNO                     3
 # define _FP_REGNO                     22
 # define _RA_REGNO                     1
 # define _SP_REGNO                     3
 # define _FP_REGNO                     22
-# define stack_framesize               160
 # define ldr(u, v)                     ldr_l(u, v)
 # define ldi(u, v)                     ldi_l(u, v)
 # define ldxi(u, v, w)                 ldxi_l(u, v, w)
 # define ldr(u, v)                     ldr_l(u, v)
 # define ldi(u, v)                     ldi_l(u, v)
 # define ldxi(u, v, w)                 ldxi_l(u, v, w)
@@ -335,6 +334,10 @@ static void _oj26(jit_state_t*, jit_int32_t,jit_int32_t);
 # define nop(i0)                       _nop(_jit, i0)
 # define comr(r0, r1)                  NOR(r0, r1, r1)
 # define negr(r0, r1)                  subr(r0, _ZERO_REGNO, r1)
 # define nop(i0)                       _nop(_jit, i0)
 # define comr(r0, r1)                  NOR(r0, r1, r1)
 # define negr(r0, r1)                  subr(r0, _ZERO_REGNO, r1)
+# define clor(r0, r1)                  CLO_D(r0, r1)
+# define clzr(r0, r1)                  CLZ_D(r0, r1)
+# define ctor(r0, r1)                  CTO_D(r0, r1)
+# define ctzr(r0, r1)                  CTZ_D(r0, r1)
 static void _nop(jit_state_t*,jit_int32_t);
 # define movr(r0, r1)                  _movr(_jit, r0, r1)
 static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
 static void _nop(jit_state_t*,jit_int32_t);
 # define movr(r0, r1)                  _movr(_jit, r0, r1)
 static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
@@ -580,7 +583,7 @@ static jit_word_t _bner(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
 # define jmpr(r0)                      JIRL(_ZERO_REGNO, r0, 0)
 # define jmpi(i0)                      _jmpi(_jit, i0)
 static jit_word_t _bnei(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
 # define jmpr(r0)                      JIRL(_ZERO_REGNO, r0, 0)
 # define jmpi(i0)                      _jmpi(_jit, i0)
-static void _jmpi(jit_state_t*, jit_word_t);
+static jit_word_t _jmpi(jit_state_t*, jit_word_t);
 # define jmpi_p(i0)                    _jmpi_p(_jit, i0)
 static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
 # define boaddr(i0, r0, r1)            _boaddr(_jit, i0, r0, r1)
 # define jmpi_p(i0)                    _jmpi_p(_jit, i0)
 static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
 # define boaddr(i0, r0, r1)            _boaddr(_jit, i0, r0, r1)
@@ -625,7 +628,7 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 # define callr(r0)                     JIRL(_RA_REGNO, r0, 0)
 # define calli(i0)                     _calli(_jit, i0)
 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 # define callr(r0)                     JIRL(_RA_REGNO, r0, 0)
 # define calli(i0)                     _calli(_jit, i0)
-static void _calli(jit_state_t*, jit_word_t);
+static jit_word_t _calli(jit_state_t*, jit_word_t);
 # define calli_p(i0)                   _calli_p(_jit, i0)
 static jit_word_t _calli_p(jit_state_t*, jit_word_t);
 # define prolog(i0)                    _prolog(_jit, i0)
 # define calli_p(i0)                   _calli_p(_jit, i0)
 static jit_word_t _calli_p(jit_state_t*, jit_word_t);
 # define prolog(i0)                    _prolog(_jit, i0)
@@ -2134,15 +2137,17 @@ _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     return (w);
 }
 
     return (w);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
-    w = (i0 - _jit->pc.w) >> 2;
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
     if (can_sign_extend_si26_p(i0))
     if (can_sign_extend_si26_p(i0))
-       B(w);
+       B(d);
     else
     else
-       (void)jmpi_p(i0);
+       w = jmpi_p(i0);
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -2501,15 +2506,17 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     return (w);
 }
 
     return (w);
 }
 
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
-    w = (i0 - _jit->pc.w) >> 2;
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
     if (can_sign_extend_si26_p(i0))
     if (can_sign_extend_si26_p(i0))
-       BL(w);
+       BL(d);
     else
     else
-       (void)calli_p(i0);
+       w = calli_p(i0);
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -2527,9 +2534,10 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -2540,44 +2548,41 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
-    subi(_SP_REGNO, _SP_REGNO, stack_framesize);
-    stxi(0, _SP_REGNO, _RA_REGNO);
-    stxi(8, _SP_REGNO, _FP_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S0))
-       stxi(16, _SP_REGNO, rn(_S0));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
-       stxi(24, _SP_REGNO, rn(_S1));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
-       stxi(32, _SP_REGNO, rn(_S2));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
-       stxi(40, _SP_REGNO, rn(_S3));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
-       stxi(48, _SP_REGNO, rn(_S4));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
-       stxi(56, _SP_REGNO, rn(_S5));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
-       stxi(64, _SP_REGNO, rn(_S6));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
-       stxi(72, _SP_REGNO, rn(_S7));
-    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
-       stxi(80, _SP_REGNO, rn(_S8));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
-       stxi_d(88, _SP_REGNO, rn(_FS0));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
-       stxi_d(96, _SP_REGNO, rn(_FS1));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
-       stxi_d(104, _SP_REGNO, rn(_FS2));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
-       stxi_d(112, _SP_REGNO, rn(_FS3));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
-       stxi_d(120, _SP_REGNO, rn(_FS4));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
-       stxi_d(128, _SP_REGNO, rn(_FS5));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
-       stxi_d(136, _SP_REGNO, rn(_FS6));
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
-       stxi_d(144, _SP_REGNO, rn(_FS7));
-    movr(_FP_REGNO, _SP_REGNO);
+
+    if (_jitc->function->stack)
+       _jitc->function->need_stack = 1;
+    if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               _jitc->function->need_stack = 1;
+               break;
+           }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+    if (_jitc->function->need_frame) {
+       stxi(0, _SP_REGNO, _RA_REGNO);
+       stxi(8, _SP_REGNO, _FP_REGNO);
+    }
+    /* callee save registers */
+    for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           stxi(offs, _SP_REGNO, rn(iregs[reg]));
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame)
+       movr(_FP_REGNO, _SP_REGNO);
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
@@ -2588,7 +2593,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     }
     if (_jitc->function->self.call & jit_call_varargs) {
        for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
     }
     if (_jitc->function->self.call & jit_call_varargs) {
        for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
-           stxi(stack_framesize - ((8 - reg) * 8),
+           stxi(jit_framesize() - ((8 - reg) * 8),
                 _FP_REGNO, rn(JIT_RA0 - reg));
     }
 }
                 _FP_REGNO, rn(JIT_RA0 - reg));
     }
 }
@@ -2596,46 +2601,31 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, offs;
     if (_jitc->function->assume_frame)
        return;
     if (_jitc->function->assume_frame)
        return;
-    movr(_SP_REGNO, _FP_REGNO);
-    ldxi(_RA_REGNO, _SP_REGNO, 0);
-    ldxi(_FP_REGNO, _SP_REGNO, 8);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S0))
-       ldxi(rn(_S0), _SP_REGNO, 16);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
-       ldxi(rn(_S1), _SP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
-       ldxi(rn(_S2), _SP_REGNO, 32);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
-       ldxi(rn(_S3), _SP_REGNO, 40);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
-       ldxi(rn(_S4), _SP_REGNO, 48);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
-       ldxi(rn(_S5), _SP_REGNO, 56);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
-       ldxi(rn(_S6), _SP_REGNO, 64);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
-       ldxi(rn(_S7), _SP_REGNO, 72);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
-       ldxi(rn(_S8), _SP_REGNO, 80);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
-       ldxi_d(rn(_FS0), _SP_REGNO, 88);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
-       ldxi_d(rn(_FS1), _SP_REGNO, 96);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
-       ldxi_d(rn(_FS2), _SP_REGNO, 104);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
-       ldxi_d(rn(_FS3), _SP_REGNO, 112);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
-       ldxi_d(rn(_FS4), _SP_REGNO, 120);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
-       ldxi_d(rn(_FS5), _SP_REGNO, 128);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
-       ldxi_d(rn(_FS6), _SP_REGNO, 136);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
-       ldxi_d(rn(_FS7), _SP_REGNO, 144);
-    addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    if (_jitc->function->need_frame) {
+       movr(_SP_REGNO, _FP_REGNO);
+       ldxi(_RA_REGNO, _SP_REGNO, 0);
+       ldxi(_FP_REGNO, _SP_REGNO, 8);
+    }
+
+    /* callee save registers */
+    for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       addi(_SP_REGNO, _SP_REGNO, jit_framesize());
     JIRL(_ZERO_REGNO, _RA_REGNO, 0);
 }
 
     JIRL(_ZERO_REGNO, _RA_REGNO, 0);
 }
 
@@ -2645,9 +2635,9 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     assert(_jitc->function->self.call & jit_call_varargs);
     /* Initialize va_list to the first stack argument. */
     if (jit_arg_reg_p(_jitc->function->vagp))
     assert(_jitc->function->self.call & jit_call_varargs);
     /* Initialize va_list to the first stack argument. */
     if (jit_arg_reg_p(_jitc->function->vagp))
-       addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8));
+       addi(r0, _FP_REGNO, jit_framesize() - ((8 - _jitc->function->vagp) * 8));
     else
     else
-       addi(r0, _FP_REGNO, _jitc->function->self.size);
+       addi(r0, _FP_REGNO, jit_selfsize());
 }
 
 static void
 }
 
 static void
index 5874afd..2871de3 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2022  Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index 2490cfa..4b95047 100644 (file)
@@ -5,6 +5,7 @@
     28,        /* align */
     0, /* save */
     0, /* load */
     28,        /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     16,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
     16,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    32,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    8, /* bswapr_us */
+    8, /* bswapr_ui */
+    4, /* bswapr_ul */
     8, /* htonr_us */
     8, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
     8, /* htonr_us */
     8, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
-    16,        /* ldi_c */
+    20,        /* ldi_c */
     4, /* ldr_uc */
     4, /* ldr_uc */
-    16,        /* ldi_uc */
+    20,        /* ldi_uc */
     4, /* ldr_s */
     4, /* ldr_s */
-    16,        /* ldi_s */
+    20,        /* ldi_s */
     4, /* ldr_us */
     4, /* ldr_us */
-    16,        /* ldi_us */
+    20,        /* ldi_us */
     4, /* ldr_i */
     4, /* ldr_i */
-    16,        /* ldi_i */
+    20,        /* ldi_i */
     4, /* ldr_ui */
     4, /* ldr_ui */
-    16,        /* ldi_ui */
+    20,        /* ldi_ui */
     4, /* ldr_l */
     4, /* ldr_l */
-    16,        /* ldi_l */
+    20,        /* ldi_l */
     4, /* ldxr_c */
     16,        /* ldxi_c */
     4, /* ldxr_uc */
     4, /* ldxr_c */
     16,        /* ldxi_c */
     4, /* ldxr_uc */
     4, /* ldxr_l */
     16,        /* ldxi_l */
     4, /* str_c */
     4, /* ldxr_l */
     16,        /* ldxi_l */
     4, /* str_c */
-    16,        /* sti_c */
+    20,        /* sti_c */
     4, /* str_s */
     4, /* str_s */
-    16,        /* sti_s */
+    20,        /* sti_s */
     4, /* str_i */
     4, /* str_i */
-    16,        /* sti_i */
+    20,        /* sti_i */
     4, /* str_l */
     4, /* str_l */
-    16,        /* sti_l */
+    20,        /* sti_l */
     4, /* stxr_c */
     16,        /* stxi_c */
     4, /* stxr_s */
     4, /* stxr_c */
     16,        /* stxi_c */
     4, /* stxr_s */
     4, /* callr */
     20,        /* calli */
     0, /* prepare */
     4, /* callr */
     20,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     4, /* movr_f */
     8, /* movi_f */
     4, /* ldr_f */
     4, /* movr_f */
     8, /* movi_f */
     4, /* ldr_f */
-    16,        /* ldi_f */
+    20,        /* ldi_f */
     4, /* ldxr_f */
     16,        /* ldxi_f */
     4, /* str_f */
     4, /* ldxr_f */
     16,        /* ldxi_f */
     4, /* str_f */
-    16,        /* sti_f */
+    20,        /* sti_f */
     4, /* stxr_f */
     16,        /* stxi_f */
     8, /* bltr_f */
     4, /* stxr_f */
     16,        /* stxi_f */
     8, /* bltr_f */
     4, /* movr_d */
     16,        /* movi_d */
     4, /* ldr_d */
     4, /* movr_d */
     16,        /* movi_d */
     4, /* ldr_d */
-    16,        /* ldi_d */
+    20,        /* ldi_d */
     4, /* ldxr_d */
     16,        /* ldxi_d */
     4, /* str_d */
     4, /* ldxr_d */
     16,        /* ldxi_d */
     4, /* str_d */
-    16,        /* sti_d */
+    20,        /* sti_d */
     4, /* stxr_d */
     16,        /* stxi_d */
     8, /* bltr_d */
     4, /* stxr_d */
     16,        /* stxi_d */
     8, /* bltr_d */
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
-    8, /* bswapr_us */
-    8, /* bswapr_ui */
-    4, /* bswapr_ul */
-    32,        /* casr */
-    44,        /* casi */
+    4, /* clo */
+    4, /* clz */
+    4, /* cto */
+    4, /* ctz */
 #endif /* __WORDSIZE */
 #endif /* __WORDSIZE */
index 78fac47..c9b5b8c 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2022  Free Software Foundation, Inc.
+ * Copyright (C) 2022-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
  *     Paulo Cesar Pereira de Andrade
  */
 
  *     Paulo Cesar Pereira de Andrade
  */
 
+/* callee save                         + variadic arguments
+ * align16(ra+fp+s[0-8]+fs[0-7])       +       align16(a[0-7]) */
+#define stack_framesize                        (144 + 64)
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
 
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
 
@@ -28,6 +32,8 @@ typedef struct jit_pointer_t jit_va_list_t;
 /*
  * Prototypes
  */
 /*
  * Prototypes
  */
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
@@ -107,6 +113,14 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8
+};
+
+static jit_int32_t fregs[] = {
+    _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7
+};
+
 /*
  * Implementation
  */
 /*
  * Implementation
  */
@@ -167,6 +181,7 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -215,20 +230,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -288,16 +301,17 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
-    return (jit_arg_f_reg_p(u->u.w));
+    return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
 }
 
 void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
 }
 
 void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare) {
        jit_link_prepare();
        assert(!(_jitc->function->call.call & jit_call_varargs));
     if (_jitc->prepare) {
        jit_link_prepare();
        assert(!(_jitc->function->call.call & jit_call_varargs));
@@ -321,19 +335,23 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
     assert(!(_jitc->function->self.call & jit_call_varargs));
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
     assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -355,6 +373,7 @@ _jit_arg_f(jit_state_t *_jit)
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
@@ -378,6 +397,7 @@ _jit_arg_d(jit_state_t *_jit)
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
@@ -388,111 +408,129 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _A0 - v->u.w);
-    else
-       jit_ldxi_c(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_c(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _A0 - v->u.w);
-    else
-       jit_ldxi_uc(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _A0 - v->u.w);
-    else
-       jit_ldxi_s(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_s(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _A0 - v->u.w);
-    else
-       jit_ldxi_us(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_us(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, _A0 - v->u.w);
-    else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_i(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _A0 - v->u.w);
-    else
-       jit_ldxi_ui(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _A0 - v->u.w);
-    else
-       jit_ldxi_l(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_A0 - v->u.w, u);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_A0 - v->u.w, u);
-    else
-       jit_stxi(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_A0 - v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_A0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, JIT_FP, regno);
+       node = jit_stxi(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -507,8 +545,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(u, _FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
        jit_movr_f(u, _FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
-    else
-       jit_ldxi_f(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_f(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -521,8 +561,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
        jit_movr_f(_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
-    else
-       jit_stxi_f(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_f(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -534,18 +576,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
     jit_inc_synth_fp(putargi_f, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(_FA0 - v->u.w, u);
     jit_inc_synth_fp(putargi_f, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(_FA0 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       union {
-           jit_float32_t       f;
-           jit_int32_t         i;
-       } uu;
-       uu.f = u;
-       jit_movi(JIT_RA0 - (v->u.w - 8), uu.i);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u);
     else {
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, JIT_FP, regno);
+       node = jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -560,8 +598,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(u, _FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
        jit_movr_d(u, _FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
-    else
-       jit_ldxi_d(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_d(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -574,8 +614,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
        jit_movr_d(_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
-    else
-       jit_stxi_d(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_d(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -587,28 +629,24 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
     jit_inc_synth_dp(putargi_d, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_d(_FA0 - v->u.w, u);
     jit_inc_synth_dp(putargi_d, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_d(_FA0 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       union {
-           jit_float64_t       d;
-           jit_int64_t         w;
-       } uu;
-       uu.d = u;
-       jit_movi(JIT_RA0 - (v->u.w - 8), uu.w);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u);
     else {
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, JIT_FP, regno);
+       node = jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_A0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_A0 - _jitc->function->call.argi, u);
@@ -617,16 +655,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
     else {
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
 
 void
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_A0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_A0 - _jitc->function->call.argi, u);
@@ -638,6 +677,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -660,6 +700,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
     else {
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -686,6 +727,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -708,6 +750,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
     else {
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -734,6 +777,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -761,6 +805,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -778,6 +823,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -877,6 +923,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1015,6 +1062,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1062,6 +1112,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1383,6 +1437,7 @@ _emit_code(jit_state_t *_jit)
                case_brr(bunord, _d);
                case_brd(bunord);
            case jit_code_jmpr:
                case_brr(bunord, _d);
                case_brd(bunord);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -1393,14 +1448,22 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (can_sign_extend_si26_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
                    jmpi(node->u.w);
+               }
                break;
            case jit_code_callr:
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
@@ -1411,22 +1474,32 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
-                       word = calli_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (can_sign_extend_si26_p(word))
+                           word = calli(_jit->pc.w);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    calli(node->u.w);
                    calli(node->u.w);
+               }
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
+               compute_framesize();
+               patch_alist(0);
                _jitc->again = 0;
                prolog(node);
                break;
                _jitc->again = 0;
                prolog(node);
                break;
@@ -1442,10 +1515,25 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* this will be recomputed but undo anyway to have it
+                    * better self documented.*/
+                   undo.func.need_stack = _jitc->function->need_stack;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                if (node->link && (word = _jit->pc.w & 3))
                    goto restart_function;
                }
                if (node->link && (word = _jit->pc.w & 3))
@@ -1488,11 +1576,18 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:                case jit_code_arg_l:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1500,10 +1595,22 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:
            case jit_code_getarg_ui:            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_i:
            case jit_code_getarg_ui:            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1600,6 +1707,27 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     stxi_d(i0, rn(r0), rn(r1));
 }
 
     stxi_d(i0, rn(r0), rn(r1));
 }
 
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = 16;     /* ra+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += sizeof(jit_word_t);
+
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+
+    /* Space to store variadic arguments */
+    if (_jitc->function->self.call & jit_call_varargs)
+       _jitc->framesize += (8 - _jitc->function->vagp) * 8;
+
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
index e4e5deb..8e736da 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index f52d6dc..0b1b3b4 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -27,11 +27,11 @@ typedef union {
     struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } ft;
     struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } rd;
     struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } fs;
     struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } ft;
     struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } rd;
     struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } fs;
+    struct {   jit_uint32_t _: 7;      jit_uint32_t b :  9; } i9;
     struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } ic;
     struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } fd;
     struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } ic;
     struct {   jit_uint32_t _: 6;      jit_uint32_t b :  5; } fd;
-    struct {   jit_uint32_t _: 6;      jit_uint32_t b : 10; } tr;
-    struct {   jit_uint32_t _: 6;      jit_uint32_t b : 20; } br;
     struct {                           jit_uint32_t b :  6; } tc;
     struct {                           jit_uint32_t b :  6; } tc;
+    struct {                           jit_uint32_t b :  5; } cn;
     struct {                           jit_uint32_t b : 11; } cc;
     struct {                           jit_uint32_t b : 16; } is;
     struct {                           jit_uint32_t b : 26; } ii;
     struct {                           jit_uint32_t b : 11; } cc;
     struct {                           jit_uint32_t b : 16; } is;
     struct {                           jit_uint32_t b : 26; } ii;
@@ -43,22 +43,19 @@ typedef union {
     struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } ft;
     struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } rd;
     struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } fs;
     struct {   jit_uint32_t _:11;      jit_uint32_t b :  5; } ft;
     struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } rd;
     struct {   jit_uint32_t _:16;      jit_uint32_t b :  5; } fs;
+    struct {   jit_uint32_t _:16;      jit_uint32_t b :  9; } i9;
     struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } ic;
     struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } fd;
     struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } ic;
     struct {   jit_uint32_t _:21;      jit_uint32_t b :  5; } fd;
-    struct {   jit_uint32_t _:21;      jit_uint32_t b : 10; } tr;
-    struct {   jit_uint32_t _:21;      jit_uint32_t b : 20; } br;
     struct {   jit_uint32_t _:26;      jit_uint32_t b :  6; } tc;
     struct {   jit_uint32_t _:26;      jit_uint32_t b :  6; } tc;
+    struct {   jit_uint32_t _:27;      jit_uint32_t b :  5; } cn;
     struct {   jit_uint32_t _:21;      jit_uint32_t b : 11; } cc;
     struct {   jit_uint32_t _:16;      jit_uint32_t b : 16; } is;
     struct {   jit_uint32_t _: 6;      jit_uint32_t b : 26; } ii;
 #endif
     int                                        op;
 } jit_instr_t;
     struct {   jit_uint32_t _:21;      jit_uint32_t b : 11; } cc;
     struct {   jit_uint32_t _:16;      jit_uint32_t b : 16; } is;
     struct {   jit_uint32_t _: 6;      jit_uint32_t b : 26; } ii;
 #endif
     int                                        op;
 } jit_instr_t;
-#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
-#  define jit_mips2_p()                        1
-#else
-#  define jit_mips2_p()                        0
-#endif
+#define jit_mips2_p()                  (jit_cpu.release >= 2)
+#define jit_mips6_p()                  (jit_cpu.release >= 6)
 #  define _ZERO_REGNO                  0
 #  define _T0_REGNO                    0x08
 #  define _T1_REGNO                    0x09
 #  define _ZERO_REGNO                  0
 #  define _T0_REGNO                    0x08
 #  define _T1_REGNO                    0x09
@@ -90,24 +87,20 @@ typedef union {
 #  define _F28_REGNO                   28
 #  define _F30_REGNO                   30
 #  if __WORDSIZE == 32
 #  define _F28_REGNO                   28
 #  define _F30_REGNO                   30
 #  if __WORDSIZE == 32
-#    if NEW_ABI
-#      define stack_framesize          144
-#    else
-#      define stack_framesize          112
-#    endif
 #    define ldr(u,v)                   ldr_i(u,v)
 #    define ldi(u,v)                   ldi_i(u,v)
 #    define ldxi(u,v,w)                        ldxi_i(u,v,w)
 #    define sti(u,v)                   sti_i(u,v)
 #    define stxi(u,v,w)                        stxi_i(u,v,w)
 #  else
 #    define ldr(u,v)                   ldr_i(u,v)
 #    define ldi(u,v)                   ldi_i(u,v)
 #    define ldxi(u,v,w)                        ldxi_i(u,v,w)
 #    define sti(u,v)                   sti_i(u,v)
 #    define stxi(u,v,w)                        stxi_i(u,v,w)
 #  else
-#    define stack_framesize            144
 #    define ldr(u,v)                   ldr_l(u,v)
 #    define ldi(u,v)                   ldi_l(u,v)
 #    define ldxi(u,v,w)                        ldxi_l(u,v,w)
 #    define sti(u,v)                   sti_l(u,v)
 #    define stxi(u,v,w)                        stxi_l(u,v,w)
 #  endif
 #    define ldr(u,v)                   ldr_l(u,v)
 #    define ldi(u,v)                   ldi_l(u,v)
 #    define ldxi(u,v,w)                        ldxi_l(u,v,w)
 #    define sti(u,v)                   sti_l(u,v)
 #    define stxi(u,v,w)                        stxi_l(u,v,w)
 #  endif
+/* can_relative_jump_p(im) => can_sign_extend_short_p(im << 2) */
+#  define can_relative_jump_p(im)      ((im) >= -130712 && (im) <= 131068)
 #  define can_sign_extend_short_p(im)  ((im) >= -32678 && (im) <= 32767)
 #  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
 #  define is_low_mask(im)              (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0)
 #  define can_sign_extend_short_p(im)  ((im) >= -32678 && (im) <= 32767)
 #  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
 #  define is_low_mask(im)              (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0)
@@ -195,6 +188,8 @@ typedef union {
 #  define MIPS_CT                      0x06
 #  define MIPS_MTH                     0x07
 #  define MIPS_BC                      0x08
 #  define MIPS_CT                      0x06
 #  define MIPS_MTH                     0x07
 #  define MIPS_BC                      0x08
+#  define MIPS_BC1EQZ                  0x09    /* release 6 */
+#  define MIPS_BC1NEZ                  0x0d    /* release 6 */
 #  define MIPS_WRPGPR                  0x0e
 #  define MIPS_BGZAL                   0x11
 #  define MIPS_MFMC0                   0x11
 #  define MIPS_WRPGPR                  0x0e
 #  define MIPS_BGZAL                   0x11
 #  define MIPS_MFMC0                   0x11
@@ -303,17 +298,32 @@ typedef union {
 #  define MIPS_DSRA32                  0x3f
 #  define MIPS_SDBPP                   0x3f
 #  define ii(i)                                *_jit->pc.ui++ = i
 #  define MIPS_DSRA32                  0x3f
 #  define MIPS_SDBPP                   0x3f
 #  define ii(i)                                *_jit->pc.ui++ = i
+#  define instr(op)                    _instr(_jit, op)
+static void _instr(jit_state_t*, jit_int32_t);
+#  define flush()                      _flush(_jit)
+static void _flush(jit_state_t*);
+#  define pending()                    _pending(_jit)
+static jit_int32_t _pending(jit_state_t*);
+#  define delay(op)                    _delay(_jit,op)
+static void _delay(jit_state_t*,jit_int32_t);
+#  define jit_get_reg_for_delay_slot(mask, r0,r1)                      \
+       _jit_get_reg_for_delay_slot(_jit,mask,r0,r1)
+static jit_int32_t _jit_get_reg_for_delay_slot(jit_state_t*,jit_int32_t,
+                                              jit_int32_t, jit_int32_t);
+#  define hrrrit(hc,rs,rt,rd,im,tc)    _hrrrit(_jit,hc,rs,rt,rd,im,tc)
 static void
 _hrrrit(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
        jit_int32_t,jit_int32_t);
 static void
 _hrrrit(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,
        jit_int32_t,jit_int32_t);
-#  define hrrrit(hc,rs,rt,rd,im,tc)    _hrrrit(_jit,hc,rs,rt,rd,im,tc)
 #  define hrrr_t(hc,rs,rt,rd,tc)       hrrrit(hc,rs,rt,rd,0,tc)
 #  define rrr_t(rs,rt,rd,tc)           hrrr_t(0,rs,rt,rd,tc)
 #  define hrri(hc,rs,rt,im)            _hrri(_jit,hc,rs,rt,im)
 static void _hrri(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define hrrr_t(hc,rs,rt,rd,tc)       hrrrit(hc,rs,rt,rd,0,tc)
 #  define rrr_t(rs,rt,rd,tc)           hrrr_t(0,rs,rt,rd,tc)
 #  define hrri(hc,rs,rt,im)            _hrri(_jit,hc,rs,rt,im)
 static void _hrri(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define hrri9(hc,rs,rt,i9,tc)                _hrri9(_jit,hc,rs,rt,i9,tc)
+static void _hrri9(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t);
 #  define hi(hc,im)                    _hi(_jit,hc,im)
 static void _hi(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define hi(hc,im)                    _hi(_jit,hc,im)
 static void _hi(jit_state_t*,jit_int32_t,jit_int32_t);
-#  define NOP(i0)                      ii(0)
+#  define NOP(i0)                      instr(0)
 #  define nop(i0)                      _nop(_jit,i0)
 static void _nop(jit_state_t*,jit_int32_t);
 #  define h_ri(hc,rt,im)               _hrri(_jit,hc,0,rt,im)
 #  define nop(i0)                      _nop(_jit,i0)
 static void _nop(jit_state_t*,jit_int32_t);
 #  define h_ri(hc,rt,im)               _hrri(_jit,hc,0,rt,im)
@@ -327,13 +337,29 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define DSUBU(rd,rs,rt)              rrr_t(rs,rt,rd,MIPS_DSUBU)
 #  define MUL(rd,rs,rt)                        hrrr_t(MIPS_SPECIAL2,rs,rt,rd,MIPS_MUL)
 #  define MULT(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT)
 #  define DSUBU(rd,rs,rt)              rrr_t(rs,rt,rd,MIPS_DSUBU)
 #  define MUL(rd,rs,rt)                        hrrr_t(MIPS_SPECIAL2,rs,rt,rd,MIPS_MUL)
 #  define MULT(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULT)
+#  define MUL_R6(rd,rs,rt)             hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 24)
+#  define MUH_R6(rd,rs,rt)             hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 24)
 #  define MULTU(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU)
 #  define MULTU(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_MULTU)
+#  define MULU_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 25)
+#  define MUHU_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 25)
 #  define DMULT(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT)
 #  define DMULT(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULT)
+#  define DMUL_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 28)
+#  define DMUH_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 28)
 #  define DMULTU(rs,rt)                        rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULTU)
 #  define DMULTU(rs,rt)                        rrr_t(rs,rt,_ZERO_REGNO,MIPS_DMULTU)
+#  define DMULU_R6(rd,rs,rt)           hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 29)
+#  define DMUHU_R6(rd,rs,rt)           hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 29)
 #  define DIV(rs,rt)                   rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIV)
 #  define DIV(rs,rt)                   rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIV)
+#  define DIV_R6(rd,rs,rt)             hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 26)
+#  define MOD_R6(rd,rs,rt)             hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 26)
 #  define DIVU(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIVU)
 #  define DIVU(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_DIVU)
+#  define DIVU_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 27)
+#  define MODU_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 27)
 #  define DDIV(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIV)
 #  define DDIV(rs,rt)                  rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIV)
+#  define DDIV_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 30)
+#  define DMOD_R6(rd,rs,rt)            hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 30)
 #  define DDIVU(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIVU)
 #  define DDIVU(rs,rt)                 rrr_t(rs,rt,_ZERO_REGNO,MIPS_DDIVU)
+#  define DDIVU_R6(rd,rs,rt)           hrrrit(MIPS_SPECIAL, rs, rt, rd, 2, 31)
+#  define DMODU_R6(rd,rs,rt)           hrrrit(MIPS_SPECIAL, rs, rt, rd, 3, 31)
 #  define SLLV(rd,rt,rs)               rrr_t(rs,rt,rd,MIPS_SLLV)
 #  define SLL(rd,rt,sa)                        rrit(rt,rd,sa,MIPS_SLL)
 #  define DSLLV(rd,rt,rs)              rrr_t(rs,rt,rd,MIPS_DSLLV)
 #  define SLLV(rd,rt,rs)               rrr_t(rs,rt,rd,MIPS_SLLV)
 #  define SLL(rd,rt,sa)                        rrit(rt,rd,sa,MIPS_SLL)
 #  define DSLLV(rd,rt,rs)              rrr_t(rs,rt,rd,MIPS_DSLLV)
@@ -368,6 +394,7 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define ANDI(rt,rs,im)               hrri(MIPS_ANDI,rs,rt,im)
 #  define OR(rd,rs,rt)                 rrr_t(rs,rt,rd,MIPS_OR)
 #  define ORI(rt,rs,im)                        hrri(MIPS_ORI,rs,rt,im)
 #  define ANDI(rt,rs,im)               hrri(MIPS_ANDI,rs,rt,im)
 #  define OR(rd,rs,rt)                 rrr_t(rs,rt,rd,MIPS_OR)
 #  define ORI(rt,rs,im)                        hrri(MIPS_ORI,rs,rt,im)
+#  define NOR(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_NOR)
 #  define XOR(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_XOR)
 #  define XORI(rt,rs,im)               hrri(MIPS_XORI,rs,rt,im)
 #  define LB(rt,of,rb)                 hrri(MIPS_LB,rb,rt,of)
 #  define XOR(rd,rs,rt)                        rrr_t(rs,rt,rd,MIPS_XOR)
 #  define XORI(rt,rs,im)               hrri(MIPS_XORI,rs,rt,im)
 #  define LB(rt,of,rb)                 hrri(MIPS_LB,rb,rt,of)
@@ -378,13 +405,17 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define LWU(rt,of,rb)                        hrri(MIPS_LWU,rb,rt,of)
 #  define LD(rt,of,rb)                 hrri(MIPS_LD,rb,rt,of)
 #  define LL(rt,of,rb)                 hrri(MIPS_LL,rb,rt,of)
 #  define LWU(rt,of,rb)                        hrri(MIPS_LWU,rb,rt,of)
 #  define LD(rt,of,rb)                 hrri(MIPS_LD,rb,rt,of)
 #  define LL(rt,of,rb)                 hrri(MIPS_LL,rb,rt,of)
+#  define LL_R6(rt,of,rb)              hrri9(MIPS_SPECIAL3,rb,rt,of,54)
 #  define LLD(rt,of,rb)                        hrri(MIPS_LLD,rb,rt,of)
 #  define LLD(rt,of,rb)                        hrri(MIPS_LLD,rb,rt,of)
+#  define LLD_R6(rt,of,rb)             hrri9(MIPS_SPECIAL3,rb,rt,of,55)
 #  define SB(rt,of,rb)                 hrri(MIPS_SB,rb,rt,of)
 #  define SH(rt,of,rb)                 hrri(MIPS_SH,rb,rt,of)
 #  define SW(rt,of,rb)                 hrri(MIPS_SW,rb,rt,of)
 #  define SD(rt,of,rb)                 hrri(MIPS_SD,rb,rt,of)
 #  define SC(rt,of,rb)                 hrri(MIPS_SC,rb,rt,of)
 #  define SB(rt,of,rb)                 hrri(MIPS_SB,rb,rt,of)
 #  define SH(rt,of,rb)                 hrri(MIPS_SH,rb,rt,of)
 #  define SW(rt,of,rb)                 hrri(MIPS_SW,rb,rt,of)
 #  define SD(rt,of,rb)                 hrri(MIPS_SD,rb,rt,of)
 #  define SC(rt,of,rb)                 hrri(MIPS_SC,rb,rt,of)
+#  define SC_R6(rt,of,rb)              hrri9(MIPS_SPECIAL3,rb,rt,of,38)
 #  define SCD(rt,of,rb)                        hrri(MIPS_SCD,rb,rt,of)
 #  define SCD(rt,of,rb)                        hrri(MIPS_SCD,rb,rt,of)
+#  define SCD_R6(rt,of,rb)             hrri9(MIPS_SPECIAL3,rb,rt,of,39)
 #  define WSBH(rd,rt)                  hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL)
 #  define SEB(rd,rt)                   hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL)
 #  define SEH(rd,rt)                   hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL)
 #  define WSBH(rd,rt)                  hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_WSBH,MIPS_BSHFL)
 #  define SEB(rd,rt)                   hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEB,MIPS_BSHFL)
 #  define SEH(rd,rt)                   hrrrit(MIPS_SPECIAL3,0,rt,rd,MIPS_SEH,MIPS_BSHFL)
@@ -398,34 +429,73 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define BGEZ(rs,im)                  hrri(MIPS_REGIMM,rs,MIPS_BGEZ,im)
 #  define BGTZ(rs,im)                  hrri(MIPS_BGTZ,rs,_ZERO_REGNO,im)
 #  define BNE(rs,rt,im)                        hrri(MIPS_BNE,rs,rt,im)
 #  define BGEZ(rs,im)                  hrri(MIPS_REGIMM,rs,MIPS_BGEZ,im)
 #  define BGTZ(rs,im)                  hrri(MIPS_BGTZ,rs,_ZERO_REGNO,im)
 #  define BNE(rs,rt,im)                        hrri(MIPS_BNE,rs,rt,im)
+#  define BGEZAL(rs,im)                        hrri(MIPS_REGIMM,rs,MIPS_BGEZAL,im)
 #  define JALR(r0)                     hrrrit(MIPS_SPECIAL,r0,0,_RA_REGNO,0,MIPS_JALR)
 #  define JALR(r0)                     hrrrit(MIPS_SPECIAL,r0,0,_RA_REGNO,0,MIPS_JALR)
-#  if 1 /* supports MIPS32 R6 */
-#   define JR(r0)                      hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JALR)
-#  else /* does not support MIPS32 R6 */
-#   define JR(r0)                      hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
+#  if 1                /* This should work for mips r6 or older */
+#    define JR(r0)                     hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JALR)
+#  else                /* This should generate an illegal instruction in mips r6 */
+#    define JR(r0)                     hrrrit(MIPS_SPECIAL,r0,0,0,0,MIPS_JR)
 #  endif
 #  endif
+#  define CLO_R6(rd,rs)                        hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x11)
+#  define DCLO_R6(rd,rs)               hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x13)
+#  define CLZ_R6(rd,rs)                        hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x10)
+#  define DCLZ_R6(rd,rs)               hrrrit(MIPS_SPECIAL,rs,0,rd,1,0x12)
+#  define BITSWAP(rd,rt)               hrrrit(MIPS_SPECIAL3,0,rt,rd,0,0x20)
+#  define DBITSWAP(rd,rt)              hrrrit(MIPS_SPECIAL3,0,rt,rd,0,0x24)
+#  define CLO(rd,rs)                   hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_CLO)
+#  define DCLO(rd,rs)                  hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_DCLO)
+#  define CLZ(rd,rs)                   hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_CLZ)
+#  define DCLZ(rd,rs)                  hrrrit(MIPS_SPECIAL2,rs,rd,rd,0,MIPS_DCLZ)
 #  define J(i0)                                hi(MIPS_J,i0)
 #  define JAL(i0)                      hi(MIPS_JAL,i0)
 #  define MOVN(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVN)
 #  define MOVZ(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVZ)
 #  define J(i0)                                hi(MIPS_J,i0)
 #  define JAL(i0)                      hi(MIPS_JAL,i0)
 #  define MOVN(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVN)
 #  define MOVZ(rd,rs,rt)               hrrrit(0,rs,rt,rd,0,MIPS_MOVZ)
+#  define SELEQZ(rd,rs,rt)             hrrrit(0,rs,rt,rd,0,53)
+#  define SELNEZ(rd,rs,rt)             hrrrit(0,rs,rt,rd,0,55)
 #  define comr(r0,r1)                  xori(r0,r1,-1)
 #  define negr(r0,r1)                  subr(r0,_ZERO_REGNO,r1)
 #  define comr(r0,r1)                  xori(r0,r1,-1)
 #  define negr(r0,r1)                  subr(r0,_ZERO_REGNO,r1)
+#  define bitswap(r0,r1)               _bitswap(_jit, r0, r1);
+static void _bitswap(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  if __WORDSIZE == 32
 #    define addr(rd,rs,rt)             ADDU(rd,rs,rt)
 #    define addiu(r0,r1,i0)            ADDIU(r0,r1,i0)
 #    define subr(rd,rs,rt)             SUBU(rd,rs,rt)
 #    define mult(rs,rt)                        MULT(rs,rt)
 #  if __WORDSIZE == 32
 #    define addr(rd,rs,rt)             ADDU(rd,rs,rt)
 #    define addiu(r0,r1,i0)            ADDIU(r0,r1,i0)
 #    define subr(rd,rs,rt)             SUBU(rd,rs,rt)
 #    define mult(rs,rt)                        MULT(rs,rt)
+#    define mul_r6(rd,rs,rt)           MUL_R6(rd,rs,rt)
+#    define muh_r6(rd,rs,rt)           MUH_R6(rd,rs,rt)
 #    define multu(rs,rt)               MULTU(rs,rt)
 #    define multu(rs,rt)               MULTU(rs,rt)
+#    define mulu_r6(rd,rs,rt)          MULU_R6(rd,rs,rt)
+#    define muhu_r6(rd,rs,rt)          MUHU_R6(rd,rs,rt)
 #    define div(rs,rt)                 DIV(rs,rt)
 #    define divu(rs,rt)                        DIVU(rs,rt)
 #    define div(rs,rt)                 DIV(rs,rt)
 #    define divu(rs,rt)                        DIVU(rs,rt)
+#    define div_r6(rd,rs,rt)           DIV_R6(rd,rs,rt)
+#    define divu_r6(rd,rs,rt)          DIVU_R6(rd,rs,rt)
+#    define mod_r6(rd,rs,rt)           MOD_R6(rd,rs,rt)
+#    define modu_r6(rd,rs,rt)          MODU_R6(rd,rs,rt)
 #  else
 #    define addr(rd,rs,rt)             DADDU(rd,rs,rt)
 #    define addiu(r0,r1,i0)            DADDIU(r0,r1,i0)
 #    define subr(rd,rs,rt)             DSUBU(rd,rs,rt)
 #    define mult(rs,rt)                        DMULT(rs,rt)
 #  else
 #    define addr(rd,rs,rt)             DADDU(rd,rs,rt)
 #    define addiu(r0,r1,i0)            DADDIU(r0,r1,i0)
 #    define subr(rd,rs,rt)             DSUBU(rd,rs,rt)
 #    define mult(rs,rt)                        DMULT(rs,rt)
+#    define mul_r6(rd,rs,rt)           DMUL_R6(rd,rs,rt)
+#    define muh_r6(rd,rs,rt)           DMUH_R6(rd,rs,rt)
 #    define multu(rs,rt)               DMULTU(rs,rt)
 #    define multu(rs,rt)               DMULTU(rs,rt)
+#    define mulu_r6(rd,rs,rt)          DMULU_R6(rd,rs,rt)
+#    define muhu_r6(rd,rs,rt)          DMUHU_R6(rd,rs,rt)
 #    define div(rs,rt)                 DDIV(rs,rt)
 #    define divu(rs,rt)                        DDIVU(rs,rt)
 #    define div(rs,rt)                 DDIV(rs,rt)
 #    define divu(rs,rt)                        DDIVU(rs,rt)
+#    define div_r6(rd,rs,rt)           DDIV_R6(rd,rs,rt)
+#    define divu_r6(rd,rs,rt)          DDIVU_R6(rd,rs,rt)
+#    define mod_r6(rd,rs,rt)           DMOD_R6(rd,rs,rt)
+#    define modu_r6(rd,rs,rt)          DMODU_R6(rd,rs,rt)
 #  endif
 #  define extr(rd,rt,lsb,nb)   _extr(_jit,rd,rt,lsb,nb)
 static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 #  endif
 #  define extr(rd,rt,lsb,nb)   _extr(_jit,rd,rt,lsb,nb)
 static void _extr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
@@ -526,8 +596,10 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
-#  define movnr(r0,r1,r2)              MOVN(r0, r1, r2)
-#  define movzr(r0,r1,r2)              MOVZ(r0, r1, r2)
+#  define movnr(r0, r1, r2)            _movnr(_jit, r0, r1, r2)
+static void _movnr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define movzr(r0, r1, r2)            _movzr(_jit, r0, r1, r2)
+static void _movzr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
 static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
                  jit_int32_t,jit_int32_t,jit_word_t);
 #  define casx(r0, r1, r2, r3, i0)     _casx(_jit, r0, r1, r2, r3, i0)
 static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
                  jit_int32_t,jit_int32_t,jit_word_t);
@@ -672,50 +744,44 @@ static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#define bltr(i0,r0,r1)                 _bltr(_jit,i0,r0,r1)
-static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bltr_u(i0,r0,r1)               _bltr_u(_jit,i0,r0,r1)
-static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define blti(i0,r0,i1)                 _blti(_jit,i0,r0,i1)
-static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define blti_u(i0,r0,i1)               _blti_u(_jit,i0,r0,i1)
-static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bler(i0,r0,r1)                 _bler(_jit,i0,r0,r1)
-static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bler_u(i0,r0,r1)               _bler_u(_jit,i0,r0,r1)
-static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define blei(i0,r0,i1)                 _blei(_jit,i0,r0,i1)
-static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define blei_u(i0,r0,i1)               _blei_u(_jit,i0,r0,i1)
-static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bltr(i0,r0,r1)                 bger(i0,r1,r0)
+#define bltr_u(i0,r0,r1)               bger_u(i0,r1,r0)
+#define blti(i0,r0,i1)                 _bgei(_jit,i0,r0,i1,0,1)
+#define blti_u(i0,r0,i1)               _bgei(_jit,i0,r0,i1,1,1)
+#define bler(i0,r0,r1)                 _bgtr(_jit,i0,r1,r0,0,1)
+#define bler_u(i0,r0,r1)               _bgtr(_jit,i0,r1,r0,1,1)
+#define blei(i0,r0,i1)                 _bgti(_jit,i0,r0,i1,0,1)
+#define blei_u(i0,r0,i1)               _bgti(_jit,i0,r0,i1,1,1)
 #define beqr(i0,r0,r1)                 _beqr(_jit,i0,r0,r1)
 static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define beqi(i0,r0,i1)                 _beqi(_jit,i0,r0,i1)
 static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #define beqr(i0,r0,r1)                 _beqr(_jit,i0,r0,r1)
 static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define beqi(i0,r0,i1)                 _beqi(_jit,i0,r0,i1)
 static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1)
-static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bger_u(i0,r0,r1)               _bger_u(_jit,i0,r0,r1)
-static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1)
-static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgei_u(i0,r0,i1)               _bgei_u(_jit,i0,r0,i1)
-static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgtr(i0,r0,r1)                 _bgtr(_jit,i0,r0,r1)
-static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgtr_u(i0,r0,r1)               _bgtr_u(_jit,i0,r0,r1)
-static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgti(i0,r0,i1)                 _bgti(_jit,i0,r0,i1)
-static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgti_u(i0,r0,i1)               _bgti_u(_jit,i0,r0,i1)
-static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1,0)
+#define bger_u(i0,r0,r1)               _bger(_jit,i0,r0,r1,1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+                       jit_bool_t);
+#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1,0,0)
+#define bgei_u(i0,r0,i1)               _bgei(_jit,i0,r0,i1,1,0)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                       jit_bool_t,jit_bool_t);
+#define bgtr(i0,r0,r1)                 _bgtr(_jit,i0,r0,r1,0,0)
+#define bgtr_u(i0,r0,r1)               _bgtr(_jit,i0,r0,r1,1,0)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
+                       jit_bool_t,jit_bool_t);
+#define bgti(i0,r0,i1)                 _bgti(_jit,i0,r0,i1,0,0)
+#define bgti_u(i0,r0,i1)               _bgti(_jit,i0,r0,i1,1,0)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                       jit_bool_t,jit_bool_t);
 #define bner(i0,r0,r1)                 _bner(_jit,i0,r0,r1)
 static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define bnei(i0,r0,i1)                 _bnei(_jit,i0,r0,i1)
 static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #  define jmpr(r0)                     _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #define bner(i0,r0,r1)                 _bner(_jit,i0,r0,r1)
 static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #define bnei(i0,r0,i1)                 _bnei(_jit,i0,r0,i1)
 static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #  define jmpr(r0)                     _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
-#  define jmpi(i0)                     _jmpi(_jit,i0)
-static jit_word_t _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi(i0,patch)               _jmpi(_jit,i0,patch)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t,jit_bool_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define boaddr(i0,r0,r1)             _boaddr(_jit,i0,r0,r1)
 static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define boaddi(i0,r0,i1)             _boaddi(_jit,i0,r0,i1)
 #  define boaddr(i0,r0,r1)             _boaddr(_jit,i0,r0,r1)
 static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define boaddi(i0,r0,i1)             _boaddi(_jit,i0,r0,i1)
@@ -758,8 +824,8 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #  define callr(r0)                    _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #  define callr(r0)                    _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
-#  define calli(i0)                    _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+#  define calli(i0,i1)                 _calli(_jit,i0,i1)
+static jit_word_t _calli(jit_state_t*,jit_word_t,jit_bool_t);
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(node)                 _prolog(_jit,node)
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(node)                 _prolog(_jit,node)
@@ -774,9 +840,584 @@ static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
 static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t);
 #define patch_at(jump,label)           _patch_at(_jit,jump,label)
 static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
 static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t);
 #define patch_at(jump,label)           _patch_at(_jit,jump,label)
 static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+/* definitions used by jit_get_reg_for_delay_slot() */
+#include "jit_mips-fpu.c"
 #endif
 
 #if CODE
 #endif
 
 #if CODE
+static void
+_instr(jit_state_t *_jit, jit_int32_t op)
+{
+    if (_jitc->inst.pend)
+       ii(_jitc->inst.op);
+    else
+       _jitc->inst.pend = 1;
+    _jitc->inst.op = op;
+}
+
+static void
+_flush(jit_state_t *_jit)
+{
+    if (_jitc->inst.pend) {
+       ii(_jitc->inst.op);
+       _jitc->inst.pend = 0;
+    }
+}
+
+static jit_int32_t
+_pending(jit_state_t *_jit)
+{
+    jit_int32_t                op;
+    if (_jitc->inst.pend) {
+       op = _jitc->inst.op;
+       _jitc->inst.pend = 0;
+    }
+    else
+       op = 0;
+    return (op);
+}
+
+static void
+_delay(jit_state_t *_jit, jit_int32_t op)
+{
+    assert(_jitc->inst.pend);
+    ii(_jitc->inst.op);
+    _jitc->inst.pend = 0;
+    ii(op);
+}
+
+static jit_int32_t
+_jit_get_reg_for_delay_slot(jit_state_t *_jit, jit_int32_t mask,
+                           jit_int32_t reg0, jit_int32_t reg1)
+{
+    jit_instr_t                i;
+    jit_int32_t                reg, r0, r1, r2, regs[3];
+    /* If will emit a pending instruction */
+    if (_jitc->inst.pend)
+       i.op = _jitc->inst.op;
+    /* Else if at least one instruction emited, check it */
+    else if (_jit->pc.uc > _jit->code.ptr)
+       i.op = _jit->pc.ui[-1];
+    /* Else, a nop */
+    else
+       i.op = 0;
+    regs[0] = regs[1] = regs[2] = -1;
+    switch (i.hc.b) {
+       case MIPS_SPECIAL:              /* 00 */
+           switch (i.tc.b) {
+               case MIPS_SLLV:         /* 04 */
+               case MIPS_SRLV:         /* 06 */
+               case MIPS_SRAV:         /* 07 */
+               case MIPS_DSLLV:        /* 14 */
+               case MIPS_DSRLV:        /* 16 */
+               case MIPS_DSRAV:        /* 17 */
+               case MIPS_ADDU:         /* 21 */
+               case MIPS_SUBU:         /* 23 */
+               case MIPS_AND:          /* 24 */
+               case MIPS_OR:           /* 25 */
+               case MIPS_XOR:          /* 26 */
+               case MIPS_NOR:          /* 27 */
+               case MIPS_SLT:          /* 2a */
+               case MIPS_SLTU:         /* 2b */
+               case MIPS_DADDU:        /* 2d */
+               case MIPS_DSUBU:        /* 2f */
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+                   /* MUL MUH */
+               case MIPS_MULT:         /* 18 */
+                   /* MULU MUHU */
+               case MIPS_MULTU:        /* 19 */
+                   /* DIV MOD */
+               case MIPS_DIV:          /* 1a */
+                   /* DIVU MODU */
+               case MIPS_DIVU:         /* 1b */
+                   /* DMUL DMUH */
+               case MIPS_DMULT:        /* 1c */
+                   /* DMULU DMUHU */
+               case MIPS_DMULTU:       /* 1d */
+                   /* DDIV DMOD */
+               case MIPS_DDIV:         /* 1e */
+                   /* DDIVU DMODU */
+               case MIPS_DDIVU:        /* 1f */
+                   if (jit_mips6_p()) {
+                       assert(i.ic.b == 2 || i.ic.b == 3);
+                       if (mask & jit_class_gpr) {
+                           regs[0] = i.rs.b;
+                           regs[1] = i.rt.b;
+                           regs[2] = i.rd.b;
+                       }
+                   }
+                   else {
+                       assert(i.rd.b == 0);
+                       if (mask & jit_class_gpr) {
+                           regs[0] = i.rs.b;
+                           regs[1] = i.rt.b;
+                           regs[2] = 0;
+                       }
+                   }
+                   break;
+                   /* CLZ */
+               case MIPS_MFHI:         /* 10 */
+                   /* CLO */
+               case MIPS_MTHI:         /* 11 */
+                   /* DCLZ */
+               case MIPS_MFLO:         /* 12 */
+                   /* DCLO */
+               case MIPS_MTLO:         /* 13 */
+                   if (mask & jit_class_gpr) {
+                       if (jit_mips6_p()) {
+                           assert(i.ic.b == 1);
+                           regs[1] = i.rd.b;
+                       }
+                       else {
+                           assert(!i.rs.b && !i.rt.b);
+                           regs[1] = 0;
+                       }
+                       regs[0] = i.rd.b;
+                       regs[1] = 0;
+                   }
+                   break;
+               case MIPS_JR:           /* 08 */
+                   assert(!jit_mips6_p());
+               case MIPS_JALR:         /* 09 */
+                   /* check for proper/known encondig */
+                   assert(!i.ic.b);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+               case MIPS_SLL:          /* 00 */
+               case MIPS_SRL:          /* 02 */
+               case MIPS_SRA:          /* 03 */
+               case MIPS_DSLL:         /* 38 */
+               case MIPS_DSRL:         /* 3a */
+               case MIPS_DSRA:         /* 3b */
+               case MIPS_DSLL32:       /* 3c */
+               case MIPS_DSRA32:       /* 3f */
+               case MIPS_DSRL32:       /* 3e */
+                   /* shift (or rotate if i.rs.b == 1) */
+                   assert(i.rs.b == 0 || i.rs.b == 1);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rt.b;
+                       regs[1] = i.rd.b;
+                       regs[2] = 0;
+                   }
+                   break;
+               case MIPS_SYNC:         /* 0f */
+                   assert(i.rs.b == 0 && i.rt.b == 0 && i.rd.b == 0);
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[1] = 0;
+                   break;
+               case MIPS_MOVZ:         /* 0a */
+               case MIPS_MOVN:         /* 0b */
+                   assert(!jit_mips6_p() && i.ic.b == 0);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+               /* SELEQZ */
+               case 53:                /* 35 */
+               /* SELNEZ */
+               case 55:                /* 37 */
+                   assert(jit_mips6_p() && i.ic.b == 0);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+               default:
+                   abort();
+           }
+           break;
+       case MIPS_REGIMM:               /* 01 */
+           switch (i.rt.b) {
+               case MIPS_BLTZ:         /* 00 */
+               case MIPS_BGEZ:         /* 01 */
+               case MIPS_BGEZAL:       /* 11 */
+                   break;
+               default:
+                   abort();
+           }
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = regs[2] = 0;
+           }
+           break;
+       case MIPS_J:                    /* 02 */
+       case MIPS_JAL:                  /* 03 */
+           if (mask & jit_class_gpr)
+               regs[0] = regs[1] = regs[2] = 0;
+           break;
+       case MIPS_LUI:                  /* 0f */
+           assert(i.rs.b == 0);
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rt.b;
+               regs[1] = regs[1] = 0;
+           }
+           break;
+       case MIPS_SPECIAL2:             /* 1c */
+           switch (i.tc.b) {
+               case MIPS_CLZ:          /* 20 */
+               case MIPS_CLO:          /* 21 */
+               case MIPS_DCLZ:         /* 24 */
+               case MIPS_DCLO:         /* 25 */
+                   assert(!jit_mips6_p() && i.rt.b == i.rd.b && i.ic.b == 0);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rd.b;
+                       regs[2] = 0;
+                   }
+                   break;
+               case MIPS_MUL:          /* 02 */
+                   assert(jit_mips2_p() && i.ic.b == 0);
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = i.rd.b;
+                   }
+                   break;
+               default:
+                   abort();
+           }
+           break;
+       case MIPS_SPECIAL3:             /* 1f */
+           switch (i.tc.b) {
+               case MIPS_EXT:          /* 00 */
+               case MIPS_DEXTM:        /* 01 */
+               case MIPS_DEXTU:        /* 02 */
+               case MIPS_DEXT:         /* 03 */
+               case MIPS_INS:          /* 04 */
+               case MIPS_DINSM:        /* 05 */
+               case MIPS_DINSU:        /* 06 */
+               case MIPS_DINS:         /* 07 */
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = 0;
+                   }
+                   break;
+               /* BITSWAP */
+               case MIPS_BSHFL:        /* 20 */
+               /* DBITSWAP */
+               case MIPS_DBSHFL:       /* 24 */
+                   switch (i.ic.b) {
+                       case MIPS_WSBH: /* 02 */
+                       case MIPS_SEB:  /* 10 */
+                       case MIPS_SEH:  /* 18 */
+                           if (mask & jit_class_gpr) {
+                               regs[0] = i.rt.b;
+                               regs[1] = i.rd.b;
+                               regs[2] = 0;
+                           }
+                           break;
+                       /* BITSWAP DBITSWAP */
+                       case 0:
+                           assert(jit_mips6_p() && i.rt.b == 0);
+                           if (mask & jit_class_gpr) {
+                               regs[0] = i.rs.b;
+                               regs[1] = i.rd.b;
+                               regs[2] = 0;
+                           }
+                           break;
+                       default:
+                           abort();
+                   }
+                   break;
+               /* SC */
+               case 38:                /* 26 */
+               /* SCD */
+               case 39:                /* 27 */
+               /* LD */
+               case 54:                /* 36 */
+               /* LLD */
+               case 55:                /* 37 */
+                   assert(jit_mips6_p());
+                   if (mask & jit_class_gpr) {
+                       regs[0] = i.rs.b;
+                       regs[1] = i.rt.b;
+                       regs[2] = 0;
+                   }
+                   break;
+               default:
+                   abort();
+           }
+           break;
+       case MIPS_COP1:                 /* 11 */
+           switch (i.tc.b) {
+               case MIPS_ADD_fmt:      /* 00 */
+                   switch (i.rs.b) {
+                       case MIPS_MF:   /* 00 */
+                       case MIPS_DMF:  /* 01 */
+                       case MIPS_MFH:  /* 03 */
+                       case MIPS_MT:   /* 04 */
+                       case MIPS_DMT:  /* 05 */
+                       case MIPS_MTH:  /* 07 */
+                           assert(i.ic.b == 0);
+                           if (mask & jit_class_gpr) {
+                               regs[0] = i.rt.b;
+                               regs[1] = regs[2] = 0;
+                           }
+                           else
+                               regs[0] = i.rd.b;
+                           break;
+                       default:
+                           goto three_fprs;
+                   }
+                   break;
+               case MIPS_SUB_fmt:      /* 01 */
+               case MIPS_MUL_fmt:      /* 02 */
+               case MIPS_DIV_fmt:      /* 03 */
+               three_fprs:
+                                       /* 10 */
+                   assert(i.rs.b == MIPS_fmt_S ||
+                                       /* 11 */
+                          i.rs.b == MIPS_fmt_D);
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[2] = 0;
+                   else {
+                       regs[0] = i.rt.b;
+                       regs[1] = i.rd.b;
+                       regs[2] = i.ic.b;
+                   }
+                   break;
+               case MIPS_SQRT_fmt:     /* 04 */
+               case MIPS_ABS_fmt:      /* 05 */
+               case MIPS_MOV_fmt:      /* 06 */
+               case MIPS_NEG_fmt:      /* 07 */
+                   assert((i.rs.b == MIPS_fmt_S || i.rs.b == MIPS_fmt_D) &&
+                          i.rt.b == 0);
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[2] = 0;
+                   else {
+                       regs[0] = i.rd.b;
+                       regs[1] = i.ic.b;
+                   }
+                   break;
+               case MIPS_CVT_fmt_S:    /* 20 */
+               case MIPS_CVT_fmt_D:    /* 21 */
+               case MIPS_CVT_fmt_W:    /* 24 */
+               case MIPS_CVT_fmt_L:    /* 25 */
+                   switch (i.rs.b) {
+                       case MIPS_fmt_S:/* 10 */
+                       case MIPS_fmt_D:/* 11 */
+                       case MIPS_fmt_W:/* 14 */
+                       case MIPS_fmt_L:/* 15 */
+                           break;
+                       default:
+                           abort();
+                   }
+                   assert(i.rt.b == 0);
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[2] = 0;
+                   else {
+                       regs[0] = i.rd.b;
+                       regs[1] = i.ic.b;
+                   }
+                   break;
+               case MIPS_cond_F:       /* 30 */
+               case MIPS_cond_UN:      /* 31 */
+               case MIPS_cond_EQ:      /* 32 */
+               case MIPS_cond_UEQ:     /* 33 */
+               case MIPS_cond_OLT:     /* 34 */
+               case MIPS_cond_ULT:     /* 35 */
+               case MIPS_cond_OLE:     /* 36 */
+               case MIPS_cond_ULE:     /* 37 */
+               case MIPS_cond_SF:      /* 38 */
+               case MIPS_cond_NGLE:    /* 39 */
+               case MIPS_cond_SEQ:     /* 3a */
+               case MIPS_cond_NGL:     /* 3b */
+               case MIPS_cond_LT:      /* 3c */
+               case MIPS_cond_NGE:     /* 3d */
+               case MIPS_cond_LE:      /* 3e */
+               case MIPS_cond_UGT:     /* 3f */
+                   assert(!jit_mips6_p() &&
+                                       /* 10 */
+                          (i.fm.b == MIPS_fmt_S ||
+                                       /* 11 */
+                           i.fm.b == MIPS_fmt_D));
+                   if (mask & jit_class_gpr)
+                       regs[0] = regs[1] = regs[2] = 0;
+                   else {
+                       regs[0] = i.ft.b;
+                       regs[1] = i.fs.b;
+                   }
+                   break;
+               default:
+                   switch (i.ic.b) {
+                       case MIPS_cmp_AF:  /* 00 */
+                       case MIPS_cmp_UN:  /* 01 */
+                       case MIPS_cmp_EQ:  /* 02 */
+                       case MIPS_cmp_UEQ: /* 03 */
+                       case MIPS_cmp_LT:  /* 04 */
+                       case MIPS_cmp_ULT: /* 05 */
+                       case MIPS_cmp_LE:  /* 06 */
+                       case MIPS_cmp_ULE: /* 07 */
+                       case MIPS_cmp_SAF: /* 08 */
+                       case MIPS_cmp_SUN: /* 09 */
+                       case MIPS_cmp_SEQ: /* 0a */
+                       case MIPS_cmp_SUEQ:/* 0b */
+                       case MIPS_cmp_SLT: /* 0c */
+                       case MIPS_cmp_SULT:/* 0d */
+                       case MIPS_cmp_SLE: /* 0e */
+                       case MIPS_cmp_SULE:/* 0f */
+                           assert(jit_mips6_p() &&
+                                          /* 14 */
+                                  (i.rs.b == MIPS_condn_S ||
+                                          /* 15 */
+                                   i.rs.b == MIPS_condn_D));
+                           if (mask & jit_class_gpr)
+                               regs[0] = regs[1] = regs[2] = 0;
+                           else {
+                               regs[0] = i.ft.b;
+                               regs[1] = i.fs.b;
+                               regs[2] = i.fd.b;
+                           }
+                           goto done;
+                       default:
+                           break;
+                   }
+                   switch (i.rt.b) {
+                       case MIPS_BC:   /* 08 */
+                           assert(!jit_mips6_p() &&
+                                       /* 00 */
+                                  (i.rs.b == MIPS_BCF ||
+                                       /* 01 */
+                                   i.rs.b == MIPS_BCT));
+                           if (mask & jit_class_gpr)
+                               regs[0] = regs[1] = regs[2] = 0;
+                           else {
+                               regs[0] = i.rt.b;
+                               regs[1] = i.rd.b;
+                           }
+                           break;
+                       case MIPS_BC1EQZ:/* 09 */
+                       case MIPS_BC1NEZ:/* 0a */
+                           assert(jit_mips6_p());
+                           if (mask & jit_class_gpr)
+                               regs[0] = regs[1] = regs[2] = 0;
+                           else
+                               regs[0] = i.rt.b;
+                           break;
+                       default:
+                           abort();
+                   }
+                   break;
+           }
+           break;
+       case MIPS_ADDIU:                /* 09 */
+       case MIPS_SLTI:                 /* 0a */
+       case MIPS_SLTIU:                /* 0b */
+       case MIPS_ANDI:                 /* 0c */
+       case MIPS_ORI:                  /* 0d */
+       case MIPS_XORI:                 /* 0e */
+       case MIPS_DADDIU:               /* 18 */
+       case MIPS_LB:                   /* 20 */
+       case MIPS_LH:                   /* 21 */
+       case MIPS_LW:                   /* 23 */
+       case MIPS_LBU:                  /* 24 */
+       case MIPS_LHU:                  /* 25 */
+       case MIPS_LWU:                  /* 27 */
+       case MIPS_SB:                   /* 28 */
+       case MIPS_SH:                   // 29 */
+       case MIPS_SW:                   /* 2b */
+       case MIPS_LD:                   /* 37 */
+       case MIPS_SD:                   /* 3f */
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = i.rt.b;
+               regs[2] = 0;
+           }
+           break;
+       case MIPS_LL:                   /* 30 */
+       case MIPS_LLD:                  /* 34 */
+       case MIPS_SC:                   /* 38 */
+       case MIPS_SCD:                  /* 3c */
+           assert(!jit_mips6_p() && i.ic.b == 0);
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = i.rt.b;
+               regs[2] = 0;
+           }
+           break;
+       case MIPS_BLEZ:                 /* 06 */
+       case MIPS_BGTZ:                 /* 07 */
+           assert(i.rt.b == 0);
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = regs[2] = 0;
+           }
+           break;
+       case MIPS_BEQ:                  /* 04 */
+       case MIPS_BNE:                  /* 05 */
+           assert(i.rt.b == 0);
+       case MIPS_LWC1:                 /* 31 */
+       case MIPS_LDC1:                 /* 35 */
+       case MIPS_SWC1:                 /* 39 */
+       case MIPS_SDC1:                 /* 3d */
+           if (mask & jit_class_gpr) {
+               regs[0] = i.rs.b;
+               regs[1] = i.rt.b;
+               regs[2] = 0;
+           }
+           else
+               regs[0] = i.rt.b;
+           break;
+       default:
+           abort();
+    }
+done:
+    /* If cannot move instruction do delay slot */
+    if (_jitc->inst.pend &&
+       (((mask & jit_class_fpr) || reg0) &&
+        (reg0 == regs[0] || reg0 == regs[1] || reg0 == regs[2])) ||
+       (((mask & jit_class_fpr) || reg1) &&
+        (reg1 == regs[0] || reg1 == regs[1] || reg1 == regs[2]))) {
+       flush();
+    }
+    /* Get a temporary register */
+retry:
+    reg = jit_get_reg(mask|jit_class_nospill);
+    /* Make sure will not use a register in use by delay slot */
+    if (_jitc->inst.pend) {
+       if (rn(reg) == regs[0] ||
+           rn(reg) == regs[1] || rn(reg) == regs[2]) {
+           r0 = reg;
+           reg = jit_get_reg(mask|jit_class_nospill);
+           if (rn(reg) == regs[0] ||
+               rn(reg) == regs[1] || rn(reg) == regs[2]) {
+               r1 = reg;
+               reg = jit_get_reg(mask|jit_class_nospill);
+               if (rn(reg) == regs[0] ||
+                   rn(reg) == regs[1] || rn(reg) == regs[2]) {
+                   r2 = reg;
+                   reg = jit_get_reg(mask|jit_class_nospill);
+                   jit_unget_reg(r2);
+               }
+               jit_unget_reg(r1);
+           }
+           jit_unget_reg(r0);
+       }
+    }
+    if (reg == JIT_NOREG) {
+       /* Cannot get a register to optimize delay slot */
+       flush();
+       /* Must find a free register */
+       if (!(mask & jit_class_chk))
+           goto retry;
+    }
+    assert(reg != JIT_NOREG || (mask & jit_class_chk));
+    return (reg);
+}
+
 static void
 _hrrrit(jit_state_t *_jit,jit_int32_t hc,
        jit_int32_t rs, jit_int32_t rt, jit_int32_t rd,
 static void
 _hrrrit(jit_state_t *_jit,jit_int32_t hc,
        jit_int32_t rs, jit_int32_t rt, jit_int32_t rd,
@@ -789,7 +1430,7 @@ _hrrrit(jit_state_t *_jit,jit_int32_t hc,
     i.rt.b = rt;
     i.rs.b = rs;
     i.hc.b = hc;
     i.rt.b = rt;
     i.rs.b = rs;
     i.hc.b = hc;
-    ii(i.op);
+    instr(i.op);
 }
 
 static void
 }
 
 static void
@@ -802,7 +1443,21 @@ _hrri(jit_state_t *_jit, jit_int32_t hc,
     i.rt.b = rt;
     i.rs.b = rs;
     i.hc.b = hc;
     i.rt.b = rt;
     i.rs.b = rs;
     i.hc.b = hc;
-    ii(i.op);
+    instr(i.op);
+}
+
+static void
+_hrri9(jit_state_t *_jit, jit_int32_t hc,
+      jit_int32_t rs, jit_int32_t rt, jit_int32_t i9, jit_int32_t tc)
+{
+    jit_instr_t                i;
+    i.op = 0;
+    i.tc.b = tc;
+    i.i9.b = i9;
+    i.rt.b = rt;
+    i.rs.b = rs;
+    i.hc.b = hc;
+    instr(i.op);
 }
 
 static void
 }
 
 static void
@@ -811,7 +1466,7 @@ _hi(jit_state_t *_jit, jit_int32_t hc, jit_int32_t im)
     jit_instr_t                i;
     i.ii.b = im;
     i.hc.b = hc;
     jit_instr_t                i;
     i.ii.b = im;
     i.hc.b = hc;
-    ii(i.op);
+    instr(i.op);
 }
 
 static void
 }
 
 static void
@@ -854,6 +1509,121 @@ _insr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
         DINS(r0, r1, pos, size);
 }
 
         DINS(r0, r1, pos, size);
 }
 
+/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+/*
+unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2 
+unsigned int mask = ~0;         
+while ((s >>= 1) > 0) 
+{
+  mask ^= (mask << s);
+  v = ((v >> s) & mask) | ((v << s) & ~mask);
+}
+*/
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t v, jit_int32_t r1)
+{
+    jit_int32_t                s, mask;
+    jit_word_t         loop, done, t0, t1;
+    movr(v, r1);
+    s = jit_get_reg(jit_class_gpr);
+    movi(rn(s), __WORDSIZE);                   /* s = sizeof(v) * CHAR_BIT; */
+    mask = jit_get_reg(jit_class_gpr);
+    movi(rn(mask), ~0L);                       /* mask = ~0; */
+    flush();
+    loop = _jit->pc.w;                         /* while ((s >>= 1) > 0) */
+    rshi(rn(s), rn(s), 1);                     /*        (s >>= 1) */
+    done = blei(_jit->pc.w, rn(s), 0);         /* no loop if s <= 0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    lshr(rn(t0), rn(mask), rn(s));             /* t0 = (mask << s) */
+    xorr(rn(mask), rn(mask), rn(t0));          /* mask ^= t0 */
+    rshr(rn(t0), v, rn(s));                    /* t0 = v >> s */
+    andr(rn(t0), rn(t0), rn(mask));            /* t0 = t0 & mask */
+    t1 = jit_get_reg(jit_class_gpr);
+    lshr(rn(t1), v, rn(s));                    /* t1 = v << s */
+    comr(v, rn(mask));                         /* v = ~mask */
+    andr(rn(t1), v, rn(t1));                   /* t1 = t1 & v */
+    orr(v, rn(t0), rn(t1));                    /* v = t0 | t1 */
+    jmpi(loop, 0);
+    flush();
+    patch_at(done, _jit->pc.w);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    jit_unget_reg(mask);
+    jit_unget_reg(s);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+    if (jit_mips6_p())
+       CLO_R6(r0, r1);
+    else
+       CLO(r0, r1);
+#else
+    if (jit_mips6_p())
+       DCLO_R6(r0, r1);
+    else
+       DCLO(r0, r1);
+#endif
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if __WORDSIZE == 32
+    if (jit_mips6_p())
+       CLZ_R6(r0, r1);
+    else
+       CLZ(r0, r1);
+#else
+    if (jit_mips6_p())
+       DCLZ_R6(r0, r1);
+    else
+       DCLZ(r0, r1);
+#endif
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_mips6_p()) {
+#if __WORDSIZE == 32
+       BITSWAP(r0, r1);
+       bswapr_ui(r0, r0);
+       CLO_R6(r0, r0);
+#else
+       DBITSWAP(r0, r1);
+       bswapr_ul(r0, r0);
+       DCLO_R6(r0, r0);
+#endif
+    }
+    else {
+       bitswap(r0, r1);
+       clor(r0, r0);
+    }
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_mips6_p()) {
+#if __WORDSIZE == 32
+       BITSWAP(r0, r1);
+       bswapr_ui(r0, r0);
+       CLZ_R6(r0, r0);
+#else
+       DBITSWAP(r0, r1);
+       bswapr_ul(r0, r0);
+       DCLZ_R6(r0, r0);
+#endif
+    }
+    else {
+       bitswap(r0, r1);
+       clzr(r0, r0);
+    }
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -1048,11 +1818,15 @@ _rsbi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    if (jit_mips2_p() && __WORDSIZE == 32)
-       MUL(r0, r1, r2);
+    if (jit_mips6_p())
+       mul_r6(r0, r1, r2);
     else {
     else {
-        multu(r1, r2);
-        MFLO(r0);
+       if (jit_mips2_p() && __WORDSIZE == 32)
+           MUL(r0, r1, r2);
+       else {
+           multu(r1, r2);
+           MFLO(r0);
+       }
     }
 }
 
     }
 }
 
@@ -1071,12 +1845,38 @@ static void
 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
        jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
 {
 _iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
        jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
 {
-    if (sign)
-       mult(r2, r3);
-    else
-       multu(r2, r3);
-    MFLO(r0);
-    MFHI(r1);
+    jit_int32_t                t0;
+    if (jit_mips6_p()) {
+       if (r0 == r2 || r0 == r3) {
+           t0 = jit_get_reg(jit_class_gpr);
+           if (sign)
+               mul_r6(rn(t0), r2, r3);
+           else
+               mulu_r6(rn(t0), r2, r3);
+       }
+       else {
+           if (sign)
+               mul_r6(r0, r2, r3);
+           else
+               mulu_r6(r0, r2, r3);
+       }
+       if (sign)
+           muh_r6(r1, r2, r3);
+       else
+           muhu_r6(r1, r2, r3);
+       if (r0 == r2 || r0 == r3) {
+           movr(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+    }
+    else {
+       if (sign)
+           mult(r2, r3);
+       else
+           multu(r2, r3);
+       MFLO(r0);
+       MFHI(r1);
+    }
 }
 
 static void
 }
 
 static void
@@ -1093,8 +1893,12 @@ _iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
 static void
 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    div(r1, r2);
-    MFLO(r0);
+    if (jit_mips6_p())
+       div_r6(r0, r1, r2);
+    else {
+       div(r1, r2);
+       MFLO(r0);
+    }
 }
 
 static void
 }
 
 static void
@@ -1110,8 +1914,12 @@ _divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    divu(r1, r2);
-    MFLO(r0);
+    if (jit_mips6_p())
+       divu_r6(r0, r1, r2);
+    else {
+       divu(r1, r2);
+       MFLO(r0);
+    }
 }
 
 static void
 }
 
 static void
@@ -1128,12 +1936,39 @@ static void
 _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
        jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
 {
 _iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
        jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
 {
-    if (sign)
-       div(r2, r3);
-    else
-       divu(r2, r3);
-    MFLO(r0);
-    MFHI(r1);
+    jit_int32_t                t0;
+    if (jit_mips6_p()) {
+       if (r0 == r2 || r0 == r3)
+           t0 = jit_get_reg(jit_class_gpr);
+       else
+           t0 = _NOREG;
+       if (sign) {
+           if (t0 == _NOREG)
+               div_r6(r0, r2, r3);
+           else
+               div_r6(rn(t0), r2, r3);
+           mod_r6(r1, r2, r3);
+       }
+       else {
+           if (t0 == _NOREG)
+               divu_r6(r0, r2, r3);
+           else
+               divu_r6(rn(t0), r2, r3);
+           modu_r6(r1, r2, r3);
+       }
+       if (t0 != _NOREG) {
+           movr(r0, rn(t0));
+           jit_unget_reg(t0);
+       }
+    }
+    else {
+       if (sign)
+           div(r2, r3);
+       else
+           divu(r2, r3);
+       MFLO(r0);
+       MFHI(r1);
+    }
 }
 
 static void
 }
 
 static void
@@ -1150,8 +1985,12 @@ _iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
 static void
 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    div(r1, r2);
-    MFHI(r0);
+    if (jit_mips6_p())
+       mod_r6(r0, r1, r2);
+    else {
+       div(r1, r2);
+       MFHI(r0);
+    }
 }
 
 static void
 }
 
 static void
@@ -1167,8 +2006,12 @@ _remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    divu(r1, r2);
-    MFHI(r0);
+    if (jit_mips6_p())
+       modu_r6(r0, r1, r2);
+    else {
+       divu(r1, r2);
+       MFHI(r0);
+    }
 }
 
 static void
 }
 
 static void
@@ -1322,7 +2165,7 @@ static jit_word_t
 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_word_t         w;
 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_word_t         w;
-
+    flush();
     w = _jit->pc.w;
 #  if __WORDSIZE == 32
     LUI(r0, i0 >> 16);
     w = _jit->pc.w;
 #  if __WORDSIZE == 32
     LUI(r0, i0 >> 16);
@@ -1339,6 +2182,36 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     return (w);
 }
 
     return (w);
 }
 
+static void
+_movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_gpr);
+       SELNEZ(rn(reg), r1, r2);
+       SELEQZ(r0, r0, r2);
+       OR(r0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       MOVN(r0, r1, r2);
+}
+
+static void
+_movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_gpr);
+       SELEQZ(rn(reg), r1, r2);
+       SELNEZ(r0, r0, r2);
+       OR(r0, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else
+       MOVZ(r0, r1, r2);
+}
+
 static void
 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
 static void
 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
@@ -1352,27 +2225,37 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
     }
     SYNC();
     /* retry: */
     }
     SYNC();
     /* retry: */
+    flush();
     retry = _jit->pc.w;
 #  if __WORDSIZE == 32
     retry = _jit->pc.w;
 #  if __WORDSIZE == 32
-    LL(r0, 0, r1);
+    if (jit_mips6_p()) LL_R6(r0, 0, r1);
+    else               LL(r0, 0, r1);
 #  else
 #  else
-    LLD(r0, 0, r1);
+    if (jit_mips6_p()) LLD_R6(r0, 0, r1);
+    else               LLD(r0, 0, r1);
 #  endif
 #  endif
+    flush();
     jump0 = _jit->pc.w;
     BNE(r0, r2, 1);                            /* bne done r0 r2 */
     movi(r0, 0);                               /* set to 0 in delay slot */
     jump0 = _jit->pc.w;
     BNE(r0, r2, 1);                            /* bne done r0 r2 */
     movi(r0, 0);                               /* set to 0 in delay slot */
+    flush();
     movr(r0, r3);                              /* after jump and delay slot */
     /* store new value */
 #  if __WORDSIZE == 32
     movr(r0, r3);                              /* after jump and delay slot */
     /* store new value */
 #  if __WORDSIZE == 32
-    SC(r0, 0, r1);
+    if (jit_mips6_p()) SC_R6(r0, 0, r1);
+    else               SC(r0, 0, r1);
 #  else
 #  else
-    SCD(r0, 0, r1);
+    if (jit_mips6_p()) SCD_R6(r0, 0, r1);
+    else               SCD(r0, 0, r1);
 #  endif
 #  endif
+    flush();
     jump1 = _jit->pc.w;
     BEQ(r0, _ZERO_REGNO, 0);                   /* beqi retry r0 0 */
     movi(r0, 1);                               /* set to 1 in delay slot */
     jump1 = _jit->pc.w;
     BEQ(r0, _ZERO_REGNO, 0);                   /* beqi retry r0 0 */
     movi(r0, 1);                               /* set to 1 in delay slot */
+    flush();
     SYNC();
     /* done: */
     SYNC();
     /* done: */
+    flush();
     done = _jit->pc.w;
     patch_at(jump0, done);
     patch_at(jump1, retry);
     done = _jit->pc.w;
     patch_at(jump0, done);
     patch_at(jump1, retry);
@@ -1483,120 +2366,90 @@ _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 static void
 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
 static void
 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_c(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_c(r0, r0);
 }
 
 static void
 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 }
 
 static void
 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LB(r0, i0, r1);
     else {
     if (can_sign_extend_short_p(i0))
        LB(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_c(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_c(r0, r0);
     }
 }
 
 static void
 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
     }
 }
 
 static void
 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_uc(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_uc(r0, r0);
 }
 
 static void
 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 }
 
 static void
 _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LBU(r0, i0, r1);
     else {
     if (can_sign_extend_short_p(i0))
        LBU(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_uc(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_uc(r0, r0);
     }
 }
 
 static void
 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
     }
 }
 
 static void
 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_s(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_s(r0, r0);
 }
 
 static void
 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 }
 
 static void
 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LH(r0, i0, r1);
     else {
     if (can_sign_extend_short_p(i0))
        LH(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_s(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_s(r0, r0);
     }
 }
 
 static void
 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
     }
 }
 
 static void
 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_us(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_us(r0, r0);
 }
 
 static void
 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 }
 
 static void
 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LHU(r0, i0, r1);
     else {
     if (can_sign_extend_short_p(i0))
        LHU(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_us(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_us(r0, r0);
     }
 }
 
 static void
 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
     }
 }
 
 static void
 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_i(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_i(r0, r0);
 }
 
 static void
 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 }
 
 static void
 _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LW(r0, i0, r1);
     else {
     if (can_sign_extend_short_p(i0))
        LW(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_i(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_i(r0, r0);
     }
 }
 
     }
 }
 
@@ -1604,48 +2457,36 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
 static void
 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_ui(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_ui(r0, r0);
 }
 
 static void
 _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 }
 
 static void
 _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LWU(r0, i0, r1);
     else {
     if (can_sign_extend_short_p(i0))
        LWU(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_ui(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_ui(r0, r0);
     }
 }
 
 static void
 _ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
     }
 }
 
 static void
 _ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1 ,jit_int32_t r2)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    addr(rn(reg), r1, r2);
-    ldr_l(r0, rn(reg));
-    jit_unget_reg(reg);
+    addr(r0, r1, r2);
+    ldr_l(r0, r0);
 }
 
 static void
 _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 }
 
 static void
 _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
        LD(r0, i0, r1);
     else {
     if (can_sign_extend_short_p(i0))
        LD(r0, i0, r1);
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_l(r0, rn(reg));
-       jit_unget_reg(reg);
+       addi(r0, r1, i0);
+       ldr_l(r0, r0);
     }
 }
 #endif
     }
 }
 #endif
@@ -1948,8 +2789,7 @@ static void
 _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     subr(r0, r1, r2);
 _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     subr(r0, r1, r2);
-    SLTU(r0, _ZERO_REGNO, r0);
-    XORI(r0, r0, 1);
+    SLTIU(r0, r0, 1);
 }
 
 static void
 }
 
 static void
@@ -1957,11 +2797,10 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     if (i0) {
        subi(r0, r1, i0);
 {
     if (i0) {
        subi(r0, r1, i0);
-       SLTU(r0, _ZERO_REGNO, r0);
+       SLTIU(r0, r0, 1);
+    } else {
+       SLTIU(r0, r1, 1);
     }
     }
-    else
-       SLTU(r0, _ZERO_REGNO, r1);
-    XORI(r0, r0, 1);
 }
 
 static void
 }
 
 static void
@@ -2059,173 +2898,19 @@ _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 }
 
 static jit_word_t
 }
 
 static jit_word_t
-_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr);
-    SLT(rn(reg), r0, r1);
-    w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r0, r1);
-    w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_word_t         d;
-    jit_int32_t                reg;
-    jit_bool_t         zero_p;
-
-    if (!(zero_p = i1 == 0))
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    if (can_sign_extend_short_p(i1)) {
-       if (!zero_p)
-           SLTI(rn(reg), r0, i1);
-       w = _jit->pc.w;
-       d = ((i0 - w) >> 2) - 1;
-       if (!zero_p)
-           BNE(rn(reg), _ZERO_REGNO, d);
-       else
-           BLTZ(r0, d);
-       NOP(1);
-    }
-    else {
-       movi(rn(reg), i1);
-       w = bltr(i0, r0, rn(reg));
-    }
-    if (!zero_p)
-       jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    if (can_sign_extend_short_p(i1)) {
-       SLTIU(rn(reg), r0, i1);
-       w = _jit->pc.w;
-       BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       movi(rn(reg), i1);
-       w = bltr_u(i0, r0, rn(reg));
-    }
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLT(rn(reg), r1, r0);
-    w = _jit->pc.w;
-    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r1, r0);
-    w = _jit->pc.w;
-    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       BLEZ(r0, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi(rn(reg), i1);
-       w = bler(i0, r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-
-    return (w);
-}
-
-static jit_word_t
-_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi(rn(reg), i1);
-       w = bler_u(i0, r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-
-    return (w);
-}
-
-static jit_word_t
-_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-
+    jit_int32_t                op, reg;
+    /* Just to not move incorrectly instruction to delay slot */
+    reg = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, r0, r1);
+    op = pending();
+    /* implicit flush() */
     w = _jit->pc.w;
     BEQ(r0, r1, ((i0 - w) >> 2) - 1);
     w = _jit->pc.w;
     BEQ(r0, r1, ((i0 - w) >> 2) - 1);
-    NOP(1);
-
+    delay(op);
+    if (reg != JIT_NOREG)
+       jit_unget_reg(reg);
     return (w);
 }
 
     return (w);
 }
 
@@ -2233,179 +2918,168 @@ static jit_word_t
 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
+    jit_int32_t                op, reg;
+    if (i1 == 0)
+       w = beqr(i0, r0, _ZERO_REGNO);
     else {
     else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+       op = pending();
        movi(rn(reg), i1);
        movi(rn(reg), i1);
-       w = beqr(i0, r0, rn(reg));
+       flush();
+       w = _jit->pc.w;
+       BEQ(r0, rn(reg), ((i0 - w) >> 2) - 1);
+       delay(op);
        jit_unget_reg(reg);
     }
        jit_unget_reg(reg);
     }
-
     return (w);
 }
 
 static jit_word_t
     return (w);
 }
 
 static jit_word_t
-_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+      jit_bool_t sltu)
 {
     jit_word_t         w;
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLT(rn(reg), r0, r1);
-    w = _jit->pc.w;
-    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r0, r1);
+    jit_int32_t                op, reg;
+    reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+    op = pending();
+    if (sltu)
+       SLTU(rn(reg), r0, r1);
+    else
+       SLT(rn(reg), r0, r1);
+    flush();
     w = _jit->pc.w;
     BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     w = _jit->pc.w;
     BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
+    delay(op);
     jit_unget_reg(reg);
     jit_unget_reg(reg);
-
     return (w);
 }
 
 static jit_word_t
     return (w);
 }
 
 static jit_word_t
-_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+      jit_bool_t sltiu, jit_bool_t bne)
 {
     jit_word_t         w;
     jit_word_t         d;
 {
     jit_word_t         w;
     jit_word_t         d;
-    jit_int32_t                reg;
     jit_bool_t         zero_p;
     jit_bool_t         zero_p;
-
-    if (!(zero_p = i1 == 0))
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    jit_int32_t                op, t0, mask;
+    zero_p = !sltiu && i1 == 0;
+    /* Even if zero_p allocate one as a mean to avoid incorrect delay slot */
+    mask = jit_class_gpr;
+    if (zero_p)
+       mask |= jit_class_chk;
+    t0 = jit_get_reg_for_delay_slot(mask, r0, _ZERO_REGNO);
     if (can_sign_extend_short_p(i1)) {
     if (can_sign_extend_short_p(i1)) {
-       if (!zero_p)
-           SLTI(rn(reg), r0, i1);
+       op = pending();
+       if (!zero_p) {
+           if (sltiu)
+               SLTIU(rn(t0), r0, i1);
+           else
+               SLTI(rn(t0), r0, i1);
+        }
+       flush();
        w = _jit->pc.w;
        d = ((i0 - w) >> 2) - 1;
        w = _jit->pc.w;
        d = ((i0 - w) >> 2) - 1;
-       if (!zero_p)
-           BEQ(rn(reg), _ZERO_REGNO, d);
-       else
-           BGEZ(r0, d);
-       NOP(1);
+       if (bne) {
+           if (!zero_p)
+               BNE(rn(t0), _ZERO_REGNO, d);
+           else
+               BLTZ(r0, d);
+       }
+       else {
+           if (!zero_p)
+               BEQ(rn(t0), _ZERO_REGNO, d);
+           else
+               BGEZ(r0, d);
+       }
     }
     else {
     }
     else {
-       movi(rn(reg), i1);
-       w = bger(i0, r0, rn(reg));
-    }
-    if (!zero_p)
-       jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    if (can_sign_extend_short_p(i1)) {
-       SLTIU(rn(reg), r0, i1);
+       op = pending();
+       movi(rn(t0), i1);
+       if (sltiu)
+           SLTU(rn(t0), r0, rn(t0));
+        else
+           SLT(rn(t0), r0, rn(t0));
+       flush();
        w = _jit->pc.w;
        w = _jit->pc.w;
-       BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       movi(rn(reg), i1);
-       w = bger_u(i0, r0, rn(reg));
+       if (bne)
+           BNE(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       else
+           BEQ(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     }
     }
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLT(rn(reg), r1, r0);
-    w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
     return (w);
 }
 
 static jit_word_t
     return (w);
 }
 
 static jit_word_t
-_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+      jit_bool_t sltu, jit_bool_t inv)
 {
     jit_word_t         w;
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r1, r0);
+    jit_int32_t                op, reg;
+    reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+    op = pending();
+    if (sltu)
+       SLTU(rn(reg), r1, r0);
+    else
+       SLT(rn(reg), r1, r0);
+    flush();
     w = _jit->pc.w;
     w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
+    if (inv)
+       BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    else
+       BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+    delay(op);
     jit_unget_reg(reg);
     jit_unget_reg(reg);
-
     return (w);
 }
 
 static jit_word_t
     return (w);
 }
 
 static jit_word_t
-_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+      jit_bool_t sltiu, jit_bool_t inv)
 {
     jit_word_t         w;
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
+    jit_int32_t                op, t0, mask;
+    mask = jit_class_gpr;
+    if (i0 == 0)
+       mask |= jit_class_chk;
+    /* Allocate even if i0 == 0 as a way to avoid incorrect delay slot */
+    t0 = jit_get_reg_for_delay_slot(mask, r0, _ZERO_REGNO);
     if (i1 == 0) {
     if (i1 == 0) {
+       op = pending();
+       /* implicit flush() */
        w = _jit->pc.w;
        w = _jit->pc.w;
-       BGTZ(r0, ((i0 - w) >> 2) - 1);
-       NOP(1);
+       if (inv) {
+           if (sltiu)
+               BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+           else
+               BLEZ(r0, ((i0 - w) >> 2) - 1);
+       }
+       else {
+           if (sltiu)
+               BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+           else
+               BGTZ(r0, ((i0 - w) >> 2) - 1);
+       }
     }
     else {
     }
     else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi(rn(reg), i1);
-       w = bgtr(i0, r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-
-    return (w);
-}
-
-static jit_word_t
-_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
+       op = pending();
+       movi(rn(t0), i1);
+       if (sltiu)
+           SLTU(rn(t0), rn(t0), r0);
+       else
+           SLT(rn(t0), rn(t0), r0);
+       flush();
        w = _jit->pc.w;
        w = _jit->pc.w;
-       BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi(rn(reg), i1);
-       w = bgtr_u(i0, r0, rn(reg));
-       jit_unget_reg(reg);
+       if (inv)
+           BEQ(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       else
+           BNE(rn(t0), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     }
     }
-
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
     return (w);
 }
 
     return (w);
 }
 
@@ -2413,11 +3087,16 @@ static jit_word_t
 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
-
+    jit_int32_t                op, reg;
+    /* Just to not move incorrectly instruction to delay slot */
+    reg = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk, r0, r1);
+    op = pending();
+    /* implicit flush() */
     w = _jit->pc.w;
     BNE(r0, r1, ((i0 - w) >> 2) - 1);
     w = _jit->pc.w;
     BNE(r0, r1, ((i0 - w) >> 2) - 1);
-    NOP(1);
-
+    delay(op);
+    if (reg != JIT_NOREG)
+       jit_unget_reg(reg);
     return (w);
 }
 
     return (w);
 }
 
@@ -2425,48 +3104,85 @@ static jit_word_t
 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
-    jit_int32_t                reg;
-
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       BNE(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
+    jit_int32_t                op, reg;
+    if (i1 == 0)
+       w = bner(i0, r0, _ZERO_REGNO);
     else {
     else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+       reg = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+       op = pending();
        movi(rn(reg), i1);
        movi(rn(reg), i1);
-       w = bner(i0, r0, rn(reg));
+       flush();
+       w = _jit->pc.w;
+       BNE(r0, rn(reg), ((i0 - w) >> 2) - 1);
+       delay(op);
        jit_unget_reg(reg);
     }
        jit_unget_reg(reg);
     }
-
     return (w);
 }
 
 static void
 _jmpr(jit_state_t *_jit, jit_int32_t r0)
 {
     return (w);
 }
 
 static void
 _jmpr(jit_state_t *_jit, jit_int32_t r0)
 {
+    jit_int32_t                op, t0;
+    /* make sure delay slot does not use r0 */
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                   r0, _ZERO_REGNO);
+    op = pending();
     JR(r0);
     JR(r0);
-    NOP(1);
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
-_jmpi(jit_state_t *_jit, jit_word_t i0)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
+_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t patch)
+{
+    jit_int32_t                op, t0;
+    jit_word_t         w, disp;
+    /* try to get a pending instruction before the jump */
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, _ZERO_REGNO, _ZERO_REGNO);
+    op = pending();
+    /* implicit flush() */
     w = _jit->pc.w;
     w = _jit->pc.w;
-    if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
-       J((i0 & ~0xf0000000) >> 2);
-       NOP(1);
+    if (jit_mips2_p()) {
+       disp = ((i0 - w) >> 2) - 1;
+       if (patch || can_sign_extend_short_p(disp)) {
+           BEQ(_ZERO_REGNO, _ZERO_REGNO, disp);
+           goto done;
+       }
     }
     }
+    if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000))
+       J((i0 & ~0xf0000000) >> 2);
     else {
     else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi_p(rn(reg), i0);
-       jmpr(rn(reg));
-       jit_unget_reg(reg);
+       if (patch)
+           w = movi_p(rn(t0), i0);
+       else
+           movi(rn(t0), i0);
+       JR(rn(t0));
     }
     }
+done:
+    delay(op);
+    jit_unget_reg(t0);
+    return (w);
+}
 
 
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         w;
+    jit_int32_t                op, t0;
+    /* make sure delay slot does not use _T9_REGNO */
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                   _T9_REGNO, _ZERO_REGNO);
+    op = pending();
+    /* implicit flush() */
+    w = _jit->pc.w;
+    movi_p(rn(t0), i0);
+    flush();                   /* movi_p will be patched */
+    JR(rn(t0));
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
     return (w);
 }
 
     return (w);
 }
 
@@ -2486,11 +3202,14 @@ _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
     addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
-    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    movzr(rn(t1), rn(t2), rn(t0));     /* if (r0 == 0) t1 = t2 */
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     addr(r0, r0, r1);
     w = _jit->pc.w;
     BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     addr(r0, r0, r1);
+    flush();
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
@@ -2514,11 +3233,14 @@ _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        addiu(rn(t1), r0, i1);
        SLT(rn(t2), r0, rn(t1));
        SLT(rn(t1), rn(t1), r0);
        addiu(rn(t1), r0, i1);
        SLT(rn(t2), r0, rn(t1));
        SLT(rn(t1), rn(t1), r0);
-       MOVZ(rn(t1), rn(t2), rn(t0));
+       movzr(rn(t1), rn(t2), rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, i1);
        w = _jit->pc.w;
        BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, i1);
+       flush();
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
@@ -2543,10 +3265,13 @@ _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     addr(rn(t0), r0, r1);
     SLTU(rn(t1), rn(t0), r0);
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     addr(rn(t0), r0, r1);
     SLTU(rn(t1), rn(t0), r0);
+    flush();
+    /* cannot optimize delay slot */
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
+    flush();
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
@@ -2564,10 +3289,13 @@ _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, i1);
        SLTU(rn(t1), rn(t0), r0);
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, i1);
        SLTU(rn(t1), rn(t0), r0);
+       flush();
+       /* cannot optimize delay slot */
        w = _jit->pc.w;
        BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
        w = _jit->pc.w;
        BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
+       flush();
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
@@ -2596,11 +3324,14 @@ _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
     addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
-    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    movzr(rn(t1), rn(t2), rn(t0));     /* if (r0 == 0) t1 = t2 */
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     addr(r0, r0, r1);
     w = _jit->pc.w;
     BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     addr(r0, r0, r1);
+    flush();
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
@@ -2624,11 +3355,14 @@ _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        addiu(rn(t1), r0, i1);
        SLT(rn(t2), r0, rn(t1));
        SLT(rn(t1), rn(t1), r0);
        addiu(rn(t1), r0, i1);
        SLT(rn(t2), r0, rn(t1));
        SLT(rn(t1), rn(t1), r0);
-       MOVZ(rn(t1), rn(t2), rn(t0));
+       movzr(rn(t1), rn(t2), rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, i1);
        w = _jit->pc.w;
        BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, i1);
+       flush();
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
@@ -2653,10 +3387,13 @@ _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     addr(rn(t0), r0, r1);
     SLTU(rn(t1), rn(t0), r0);
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     addr(rn(t0), r0, r1);
     SLTU(rn(t1), rn(t0), r0);
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
+    flush();
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
@@ -2674,10 +3411,13 @@ _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, i1);
        SLTU(rn(t1), rn(t0), r0);
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, i1);
        SLTU(rn(t1), rn(t0), r0);
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
        w = _jit->pc.w;
        BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
+       flush();
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
@@ -2706,11 +3446,13 @@ _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
     subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
-    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    movzr(rn(t1), rn(t2), rn(t0));     /* if (r0 == 0) t1 = t2 */
+    flush();
     w = _jit->pc.w;
     BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     subr(r0, r0, r1);
     w = _jit->pc.w;
     BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     subr(r0, r0, r1);
+    flush();
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
@@ -2734,11 +3476,13 @@ _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        addiu(rn(t1), r0, -i1);
        SLT(rn(t2), rn(t1), r0);
        SLT(rn(t1), r0, rn(t1));
        addiu(rn(t1), r0, -i1);
        SLT(rn(t2), rn(t1), r0);
        SLT(rn(t1), r0, rn(t1));
-       MOVZ(rn(t1), rn(t2), rn(t0));
+       movzr(rn(t1), rn(t2), rn(t0));
+       flush();
        w = _jit->pc.w;
        BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, -i1);
        w = _jit->pc.w;
        BNE(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, -i1);
+       flush();
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
@@ -2763,10 +3507,13 @@ _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     subr(rn(t0), r0, r1);
     SLTU(rn(t1), r0, rn(t0));
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     subr(rn(t0), r0, r1);
     SLTU(rn(t1), r0, rn(t0));
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
+    flush();
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
@@ -2784,10 +3531,13 @@ _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, -i1);
        SLTU(rn(t1), r0, rn(t0));
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, -i1);
        SLTU(rn(t1), r0, rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
        w = _jit->pc.w;
        BNE(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
+       flush();
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
@@ -2816,11 +3566,14 @@ _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
     subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
-    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (t0 == 0) t1 = t2 */
+    movzr(rn(t1), rn(t2), rn(t0));     /* if (t0 == 0) t1 = t2 */
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     subr(r0, r0, r1);
     w = _jit->pc.w;
     BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
     subr(r0, r0, r1);
+    flush();
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
@@ -2844,11 +3597,14 @@ _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        addiu(rn(t1), r0, -i1);
        SLT(rn(t2), rn(t1), r0);
        SLT(rn(t1), r0, rn(t1));
        addiu(rn(t1), r0, -i1);
        SLT(rn(t2), rn(t1), r0);
        SLT(rn(t1), r0, rn(t1));
-       MOVZ(rn(t1), rn(t2), rn(t0));
+       movzr(rn(t1), rn(t2), rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, -i1);
        w = _jit->pc.w;
        BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
        /* delay slot */
        addiu(r0, r0, -i1);
+       flush();
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
        jit_unget_reg(t2);
        jit_unget_reg(t1);
        jit_unget_reg(t0);
@@ -2873,10 +3629,13 @@ _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     subr(rn(t0), r0, r1);
     SLTU(rn(t1), r0, rn(t0));
     t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
     subr(rn(t0), r0, r1);
     SLTU(rn(t1), r0, rn(t0));
+    /* cannot optimize delay slot */
+    flush();
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
     /* delay slot */
     movr(r0, rn(t0));
+    flush();
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
     return (w);
@@ -2894,10 +3653,13 @@ _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, -i1);
        SLTU(rn(t1), r0, rn(t0));
        t1 = jit_get_reg(jit_class_gpr|jit_class_nospill);
        addiu(rn(t0), r0, -i1);
        SLTU(rn(t1), r0, rn(t0));
+       /* cannot optimize delay slot */
+       flush();
        w = _jit->pc.w;
        BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
        w = _jit->pc.w;
        BEQ(_ZERO_REGNO, rn(t1), ((i0 - w) >> 2) - 1);
        /* delay slot */
        movr(r0, rn(t0));
+       flush();
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
        jit_unget_reg(t1);
        jit_unget_reg(t0);
     }
@@ -2914,12 +3676,14 @@ static jit_word_t
 _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
 _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    jit_int32_t                op, t0;
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+    op = pending();
     AND(rn(t0), r0, r1);
     AND(rn(t0), r0, r1);
+    flush();
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
-    NOP(1);
+    delay(op);
     jit_unget_reg(t0);
     return (w);
 }
     jit_unget_reg(t0);
     return (w);
 }
@@ -2928,14 +3692,14 @@ static jit_word_t
 _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
 _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
-
+    jit_int32_t                op, t0;
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+    op = pending();
     andi(rn(t0), r0, i1);
     andi(rn(t0), r0, i1);
+    flush();
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
     w = _jit->pc.w;
     BNE(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
-    NOP(1);
-
+    delay(op);
     jit_unget_reg(t0);
     return (w);
 }
     jit_unget_reg(t0);
     return (w);
 }
@@ -2944,12 +3708,14 @@ static jit_word_t
 _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
 _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    jit_int32_t                op, t0;
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, r1);
+    op = pending();
     AND(rn(t0), r0, r1);
     AND(rn(t0), r0, r1);
+    flush();
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
-    NOP(1);
+    delay(op);
     jit_unget_reg(t0);
     return (w);
 }
     jit_unget_reg(t0);
     return (w);
 }
@@ -2958,14 +3724,14 @@ static jit_word_t
 _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
 _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     jit_word_t         w;
-    jit_int32_t                t0;
-    t0 = jit_get_reg(jit_class_gpr|jit_class_nospill);
-
+    jit_int32_t                op, t0;
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr, r0, _ZERO_REGNO);
+    op = pending();
     andi(rn(t0), r0, i1);
     andi(rn(t0), r0, i1);
+    flush();
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
     w = _jit->pc.w;
     BEQ(_ZERO_REGNO, rn(t0), ((i0 - w) >> 2) - 1);
-    NOP(1);
-
+    delay(op);
     jit_unget_reg(t0);
     return (w);
 }
     jit_unget_reg(t0);
     return (w);
 }
@@ -2973,78 +3739,112 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 static void
 _callr(jit_state_t *_jit, jit_int32_t r0)
 {
 static void
 _callr(jit_state_t *_jit, jit_int32_t r0)
 {
-    JALR(r0);
-    if (r0 != _T9_REGNO)
-       movr(_T9_REGNO, r0);
-    else
-       NOP(1);
+    jit_int32_t                op, t0;
+    if (r0 != _T9_REGNO) {
+       JALR(r0);
+       /* delay slot */
+        movr(_T9_REGNO, r0);
+       flush();
+    }
+    else {
+       /* make sure delay slot does not use r0 */
+       t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                       r0, _ZERO_REGNO);
+       op = pending();
+       JALR(r0);
+       delay(op);
+       if (t0 != JIT_NOREG)
+           jit_unget_reg(t0);
+    }
 }
 
 }
 
-static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t patch)
 {
 {
-    if (((_jit->pc.w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
-        if (can_sign_extend_short_p(i0)) {
-            JAL((i0 & ~0xf0000000) >> 2);
-            addiu(_T9_REGNO, _ZERO_REGNO, i0);
-            return;
-        }
-
-        if (can_zero_extend_short_p(i0)) {
-            JAL((i0 & ~0xf0000000) >> 2);
-            ORI(_T9_REGNO, _ZERO_REGNO, i0);
-            return;
+    jit_int32_t                op, t0;
+    jit_word_t         w, disp;
+    w = _jit->pc.w;
+    if (jit_mips2_p()) {
+       disp = ((i0 - w) >> 2) - 1;
+       if (patch || can_sign_extend_short_p(disp)) {
+           op = pending();
+           BGEZAL(_ZERO_REGNO, disp);  /* Renamed to BAL in mips release 6 */
+           delay(op);
+           goto done;
+       }
+    }
+    assert(!patch);
+    flush();
+    if (((w + sizeof(jit_int32_t)) & 0xf0000000) == (i0 & 0xf0000000)) {
+       if (can_sign_extend_short_p(i0)) {
+           JAL((i0 & ~0xf0000000) >> 2);
+           /* delay slot */
+           addiu(_T9_REGNO, _ZERO_REGNO, i0);
+       }
+       else if (can_zero_extend_short_p(i0)) {
+           JAL((i0 & ~0xf0000000) >> 2);
+           /* delay slot */
+           ORI(_T9_REGNO, _ZERO_REGNO, i0);
         }
         }
-
-        if (can_sign_extend_int_p(i0)) {
-            if (i0 & 0xffff) {
-                LUI(_T9_REGNO, i0 >> 16);
-                JAL((i0 & ~0xf0000000) >> 2);
-                ORI(_T9_REGNO, _T9_REGNO, i0);
-            } else {
-                JAL((i0 & ~0xf0000000) >> 2);
-                LUI(_T9_REGNO, i0 >> 16);
+       else if (can_sign_extend_int_p(i0)) {
+           if (i0 & 0xffff) {
+               LUI(_T9_REGNO, i0 >> 16);
+               JAL((i0 & ~0xf0000000) >> 2);
+               /* delay slot */
+               ORI(_T9_REGNO, _T9_REGNO, i0);
             }
             }
-            return;
+           else {
+               JAL((i0 & ~0xf0000000) >> 2);
+               /* delay slot */
+               LUI(_T9_REGNO, i0 >> 16);
+           }
         }
         }
+       else
+           goto fallback;
     }
     }
-
-    movi(_T9_REGNO, i0);
-    JALR(_T9_REGNO);
-    NOP(1);
+    else {
+    fallback:
+       /* make sure delay slot does not use _T9_REGNO */
+       t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                       _T9_REGNO, _ZERO_REGNO);
+       /* try to get an instruction before the call */
+       op = pending();
+       movi(_T9_REGNO, i0);
+       JALR(_T9_REGNO);
+       delay(op);
+       if (t0 != JIT_NOREG)
+           jit_unget_reg(t0);
+    }
+    done:
+    return (w);
 }
 
 static jit_word_t
 _calli_p(jit_state_t *_jit, jit_word_t i0)
 {
     jit_word_t         word;
 }
 
 static jit_word_t
 _calli_p(jit_state_t *_jit, jit_word_t i0)
 {
     jit_word_t         word;
-
+    jit_int32_t                op, t0;
+    /* make sure delay slot does not use _T9_REGNO */
+    t0 = jit_get_reg_for_delay_slot(jit_class_gpr|jit_class_chk,
+                                   _T9_REGNO, _ZERO_REGNO);
+    op = pending();
+    /* implicit flush() */
     word = _jit->pc.w;
     movi_p(_T9_REGNO, i0);
     JALR(_T9_REGNO);
     word = _jit->pc.w;
     movi_p(_T9_REGNO, i0);
     JALR(_T9_REGNO);
-    NOP(1);
-
+    delay(op);
+    if (t0 != JIT_NOREG)
+       jit_unget_reg(t0);
     return (word);
 }
 
     return (word);
 }
 
-static jit_int32_t fregs[] = {
-    _F30, _F28, _F26, _F24, _F22, _F20,
-#if !NEW_ABI
-    _F18, _F16,
-#endif
-};
-
-static jit_int32_t iregs[] = {
-    _S7, _S6, _S5, _S4, _S3, _S2, _S1, _S0,
-};
-
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                index;
-    jit_int32_t                offset;
+    jit_int32_t                reg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -3063,51 +3863,65 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
                              /* align stack at 8 bytes */
                              _jitc->function->self.aoff) + 7) & -8;
 #endif
                              /* align stack at 8 bytes */
                              _jitc->function->self.aoff) + 7) & -8;
 #endif
-    /* callee save registers */
+
 #if NEW_ABI
 #if NEW_ABI
-    if ((_jitc->function->self.call & jit_call_varargs) &&
-       jit_arg_reg_p(_jitc->function->vagp))
-       subi(_SP_REGNO, _SP_REGNO, stack_framesize + 64);
-    else
+    if (_jitc->function->stack)
+       _jitc->function->need_stack = 1;
+    if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               _jitc->function->need_stack = 1;
+               break;
+           }
+    }
+#else
+    /* Need always a frame due to the need to always allocate 16 bytes */
+    jit_check_frame();
 #endif
 #endif
-       subi(_SP_REGNO, _SP_REGNO, stack_framesize);
-    offset = stack_framesize - (sizeof(jit_word_t) << 1);
-    for (index = 0; index < jit_size(fregs); index++, offset -= 8) {
-       if (jit_regset_tstbit(&_jitc->function->regset, fregs[index]))
-           stxi_d(offset, _SP_REGNO, rn(fregs[index]));
-    }
-    for (index = 0; index < jit_size(iregs);
-        index++, offset -= sizeof(jit_word_t)) {
-       if (jit_regset_tstbit(&_jitc->function->regset, iregs[index]))
-           stxi(offset, _SP_REGNO, rn(iregs[index]));
-    }
-    assert(offset >= sizeof(jit_word_t));
-    stxi(offset, _SP_REGNO, _RA_REGNO);
-    stxi(0, _SP_REGNO, _BP_REGNO);
-    movr(_BP_REGNO, _SP_REGNO);
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+    if (_jitc->function->need_frame) {
+       stxi(0, _SP_REGNO, _RA_REGNO);
+       stxi(STACK_SLOT, _SP_REGNO, _BP_REGNO);
+    }
+    /* callee save registers */
+    for (reg = 0, offs = STACK_SLOT << 1; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           stxi(offs, _SP_REGNO, rn(iregs[reg]));
+           offs += STACK_SLOT;
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame)
+       movr(_BP_REGNO, _SP_REGNO);
 
     /* alloca */
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
 
     /* alloca */
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
-       index = jit_get_reg(jit_class_gpr);
-       movi(rn(index), _jitc->function->self.aoff);
-       stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(index));
-       jit_unget_reg(index);
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), _jitc->function->self.aoff);
+       stxi_i(_jitc->function->aoffoff, _BP_REGNO, rn(reg));
+       jit_unget_reg(reg);
     }
 
     if (_jitc->function->self.call & jit_call_varargs) {
     }
 
     if (_jitc->function->self.call & jit_call_varargs) {
+       for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg) {
+           offs = jit_framesize() - ((NUM_WORD_ARGS - reg) * STACK_SLOT);
 #if NEW_ABI
 #if NEW_ABI
-       index = _jitc->function->vagp;
+           SD(rn(_A0 - reg), offs, _BP_REGNO);
 #else
 #else
-       index = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
-#endif
-       offset = stack_framesize + index * STACK_SLOT;
-       for (; jit_arg_reg_p(index); ++index, offset += STACK_SLOT) {
-#if NEW_ABI
-           SD(rn(_A0 - index), offset, _BP_REGNO);
-#else
-           stxi(offset +  WORD_ADJUST, _BP_REGNO, rn(_A0 - index));
+           offs += 16 + WORD_ADJUST;
+           stxi(offs, _BP_REGNO, rn(_A0 - reg));
 #endif
        }
     }
 #endif
        }
     }
@@ -3116,48 +3930,51 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                index;
-    jit_int32_t                offset;
+    jit_int32_t                reg, offs;
     if (_jitc->function->assume_frame)
        return;
     if (_jitc->function->assume_frame)
        return;
+
+    if (_jitc->function->need_frame) {
+       movr(_SP_REGNO, _BP_REGNO);
+       ldxi(_RA_REGNO, _SP_REGNO, 0);
+       ldxi(_BP_REGNO, _SP_REGNO, STACK_SLOT);
+    }
+
     /* callee save registers */
     /* callee save registers */
-    movr(_SP_REGNO, _BP_REGNO);
-    offset = stack_framesize - (sizeof(jit_word_t) << 1);
-    for (index = 0; index < jit_size(fregs); index++, offset -= 8) {
-       if (jit_regset_tstbit(&_jitc->function->regset, fregs[index]))
-           ldxi_d(rn(fregs[index]), _SP_REGNO, offset);
-    }
-    for (index = 0; index < jit_size(iregs);
-        index++, offset -= sizeof(jit_word_t)) {
-       if (jit_regset_tstbit(&_jitc->function->regset, iregs[index]))
-           ldxi(rn(iregs[index]), _SP_REGNO, offset);
-    }
-    assert(offset >= sizeof(jit_word_t));
-    ldxi(_RA_REGNO, _SP_REGNO, offset);
-    ldxi(_BP_REGNO, _SP_REGNO, 0);
+    for (reg = 0, offs = STACK_SLOT << 1; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
     JR(_RA_REGNO);
     /* delay slot */
     JR(_RA_REGNO);
     /* delay slot */
-#if NEW_ABI
-    if ((_jitc->function->self.call & jit_call_varargs) &&
-       jit_arg_reg_p(_jitc->function->vagp))
-       addi(_SP_REGNO, _SP_REGNO, stack_framesize + 64);
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       addi(_SP_REGNO, _SP_REGNO, jit_framesize());
     else
     else
-#endif
-       addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+       NOP(1);
+    flush();
 }
 
 static void
 _vastart(jit_state_t *_jit, jit_int32_t r0)
 {
     assert(_jitc->function->self.call & jit_call_varargs);
 }
 
 static void
 _vastart(jit_state_t *_jit, jit_int32_t r0)
 {
     assert(_jitc->function->self.call & jit_call_varargs);
-    /* Initialize va_list to the first stack argument. */
 #if NEW_ABI
 #if NEW_ABI
+    /* Initialize va_list to the first stack argument. */
     if (jit_arg_reg_p(_jitc->function->vagp))
     if (jit_arg_reg_p(_jitc->function->vagp))
-       addi(r0, _BP_REGNO, stack_framesize + _jitc->function->vagp *
-            sizeof(jit_int64_t));
+       addi(r0, _BP_REGNO,
+            jit_framesize() -
+            ((NUM_WORD_ARGS - _jitc->function->vagp) * STACK_SLOT));
     else
 #endif
     else
 #endif
-       addi(r0, _BP_REGNO, _jitc->function->self.size);
+       addi(r0, _BP_REGNO, jit_selfsize());
 }
 
 static void
 }
 
 static void
@@ -3247,16 +4064,31 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
            break;
 
        case MIPS_COP1:                 case MIPS_COP2:
            break;
 
        case MIPS_COP1:                 case MIPS_COP2:
-           assert(i.rs.b == MIPS_BC);
-           switch (i.rt.b) {
-               case MIPS_BCF:          case MIPS_BCFL:
-               case MIPS_BCT:          case MIPS_BCTL:
-                   i.is.b = ((label - instr) >> 2) - 1;
-                   u.i[0] = i.op;
-                   break;
-               default:
-                   assert(!"unhandled branch opcode");
-                   break;
+           if (jit_mips6_p()) {
+               switch (i.rs.b) {
+                   case MIPS_BC1EQZ:   case MIPS_BC1NEZ:
+                       assert(jit_mips6_p());
+                       i.is.b = ((label - instr) >> 2) - 1;
+                       u.i[0] = i.op;
+                       break;
+                   default:
+                       assert(!"unhandled branch opcode");
+                       break;
+               }
+           }
+           else {
+               assert(i.rs.b == MIPS_BC);
+               switch (i.rt.b) {
+                   case MIPS_BCF:              case MIPS_BCFL:
+                   case MIPS_BCT:              case MIPS_BCTL:
+                       assert(!jit_mips6_p());
+                       i.is.b = ((label - instr) >> 2) - 1;
+                       u.i[0] = i.op;
+                       break;
+                   default:
+                       assert(!"unhandled branch opcode");
+                       break;
+               }
            }
            break;
 
            }
            break;
 
index 6209fd6..8e3df86 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -27,6 +27,8 @@
 #  define MIPS_fmt_PS                  0x16            /* 2 x float32 */
 #  define MIPS_fmt_S_PU                        0x20
 #  define MIPS_fmt_S_PL                        0x26
 #  define MIPS_fmt_PS                  0x16            /* 2 x float32 */
 #  define MIPS_fmt_S_PU                        0x20
 #  define MIPS_fmt_S_PL                        0x26
+#  define MIPS_condn_S                 0x14            /* release 6 */
+#  define MIPS_condn_D                 0x15            /* release 6 */
 #  define MIPS_ADD_fmt                 0x00
 #  define MIPS_LWXC1                   0x00
 #  define MIPS_SUB_fmt                 0x01
 #  define MIPS_ADD_fmt                 0x00
 #  define MIPS_LWXC1                   0x00
 #  define MIPS_SUB_fmt                 0x01
 #  define MIPS_cond_NGE                        0x3d
 #  define MIPS_cond_LE                 0x3e
 #  define MIPS_cond_UGT                        0x3f
 #  define MIPS_cond_NGE                        0x3d
 #  define MIPS_cond_LE                 0x3e
 #  define MIPS_cond_UGT                        0x3f
+/* Mips release 6 */
+#  define MIPS_cmp_AF                  0x00
+#  define MIPS_cmp_UN                  0x01
+#  define MIPS_cmp_EQ                  0x02
+#  define MIPS_cmp_UEQ                 0x03
+#  define MIPS_cmp_LT                  0x04
+#  define MIPS_cmp_ULT                 0x05
+#  define MIPS_cmp_LE                  0x06
+#  define MIPS_cmp_ULE                 0x07
+#  define MIPS_cmp_SAF                 0x08
+#  define MIPS_cmp_SUN                 0x09
+#  define MIPS_cmp_SEQ                 0x0a
+#  define MIPS_cmp_SUEQ                        0x0b
+#  define MIPS_cmp_SLT                 0x0c
+#  define MIPS_cmp_SULT                        0x0d
+#  define MIPS_cmp_SLE                 0x0e
+#  define MIPS_cmp_SULE                        0x0f
 #  define ADD_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_ADD_fmt)
 #  define ADD_D(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_ADD_fmt)
 #  define SUB_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_SUB_fmt)
 #  define ADD_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_ADD_fmt)
 #  define ADD_D(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_D,ft,fs,fd,MIPS_ADD_fmt)
 #  define SUB_S(fd,fs,ft)              hrrrit(MIPS_COP1,MIPS_fmt_S,ft,fs,fd,MIPS_SUB_fmt)
 #  define SQRT_S(fd,fs)                        hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_SQRT_fmt)
 #  define SQRT_D(fd,fs)                        hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_SQRT_fmt)
 #  define MFC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MF,rt,fs,0,0)
 #  define SQRT_S(fd,fs)                        hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_SQRT_fmt)
 #  define SQRT_D(fd,fs)                        hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_SQRT_fmt)
 #  define MFC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MF,rt,fs,0,0)
+#  define MFHC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_MFH,rt,fs,0,0)
 #  define MTC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MT,rt,fs,0,0)
 #  define MTC1(rt, fs)                 hrrrit(MIPS_COP1,MIPS_MT,rt,fs,0,0)
+#  define MTHC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_MTH,rt,fs,0,0)
 #  define DMFC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_DMF,rt,fs,0,0)
 #  define DMTC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_DMT,rt,fs,0,0)
 #  define CVT_D_S(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_D)
 #  define DMFC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_DMF,rt,fs,0,0)
 #  define DMTC1(rt, fs)                        hrrrit(MIPS_COP1,MIPS_DMT,rt,fs,0,0)
 #  define CVT_D_S(fd,fs)               hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_CVT_fmt_D)
 #  define MOV_S(fd, fs)                        hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_MOV_fmt)
 #  define MOV_D(fd, fs)                        hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_MOV_fmt)
 #  define BC1F(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCF,im)
 #  define MOV_S(fd, fs)                        hrrrit(MIPS_COP1,MIPS_fmt_S,0,fs,fd,MIPS_MOV_fmt)
 #  define MOV_D(fd, fs)                        hrrrit(MIPS_COP1,MIPS_fmt_D,0,fs,fd,MIPS_MOV_fmt)
 #  define BC1F(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCF,im)
+#  define BC1EQZ(ft,im)                        hrri(MIPS_COP1,MIPS_BC1EQZ,ft,im)
 #  define BC1T(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCT,im)
 #  define BC1T(im)                     hrri(MIPS_COP1,MIPS_BC,MIPS_BCT,im)
+#  define BC1NEZ(ft,im)                        hrri(MIPS_COP1,MIPS_BC1NEZ,ft,im)
 #  define C_F_S(fs,ft)                 c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_F)
 #  define C_F_D(fs,ft)                 c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_F)
 #  define C_F_PS(fs,ft)                        c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_F)
 #  define C_F_S(fs,ft)                 c_cond_fmt(MIPS_fmt_S,ft,fs,MIPS_cond_F)
 #  define C_F_D(fs,ft)                 c_cond_fmt(MIPS_fmt_D,ft,fs,MIPS_cond_F)
 #  define C_F_PS(fs,ft)                        c_cond_fmt(MIPS_fmt_PS,ft,fs,MIPS_cond_F)
 static void
 _c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
            jit_int32_t ft, jit_int32_t fs, jit_int32_t cc);
 static void
 _c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
            jit_int32_t ft, jit_int32_t fs, jit_int32_t cc);
+#  define CMP_AF_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_AF)
+#  define CMP_AF_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_AF)
+#  define CMP_UN_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UN)
+#  define CMP_UN_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UN)
+#  define CMP_EQ_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_EQ)
+#  define CMP_EQ_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_EQ)
+#  define CMP_UEQ_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_UEQ)
+#  define CMP_UEQ_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_UEQ)
+#  define CMP_LT_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LT)
+#  define CMP_LT_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LT)
+#  define CMP_ULT_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULT)
+#  define CMP_ULT_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULT)
+#  define CMP_LE_S(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_LE)
+#  define CMP_LE_D(fd,fs,ft)           cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_LE)
+#  define CMP_ULE_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_ULE)
+#  define CMP_ULE_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_ULE)
+#  define CMP_SAF_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SAF)
+#  define CMP_SAF_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SAF)
+#  define CMP_SUN_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUN)
+#  define CMP_SUN_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUN)
+#  define CMP_SEQ_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SEQ)
+#  define CMP_SEQ_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SEQ)
+#  define CMP_SUEQ_S(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SUEQ)
+#  define CMP_SUEQ_D(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SUEQ)
+#  define CMP_SLT_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLT)
+#  define CMP_SLT_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLT)
+#  define CMP_SULT_S(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULT)
+#  define CMP_SULT_D(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULT)
+#  define CMP_SLE_S(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SLE)
+#  define CMP_SLE_D(fd,fs,ft)          cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SLE)
+#  define CMP_SULE_S(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_S,fd,ft,fs,MIPS_cmp_SULE)
+#  define CMP_SULE_D(fd,fs,ft)         cmp_cond_fmt(MIPS_condn_D,fd,ft,fs,MIPS_cmp_SULE)
+#  define cmp_cond_fmt(fm,fd,ft,fs,cn) _cmp_cond_fmt(_jit,fm,fd,ft,fs,cn)
+static void
+_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd,
+             jit_int32_t ft, jit_int32_t fs, jit_int32_t cn);
 #  define addr_f(r0,r1,r2)             ADD_S(r0,r1,r2)
 #  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
 static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
 #  define addr_f(r0,r1,r2)             ADD_S(r0,r1,r2)
 #  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
 static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
@@ -220,7 +279,7 @@ static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
 #  define sqrtr_f(r0,r1)               SQRT_S(r0,r1)
 #  define sqrtr_d(r0,r1)               SQRT_D(r0,r1)
 #  define movr_w_f(r0, r1)             MTC1(r1, r0)
 #  define sqrtr_f(r0,r1)               SQRT_S(r0,r1)
 #  define sqrtr_d(r0,r1)               SQRT_D(r0,r1)
 #  define movr_w_f(r0, r1)             MTC1(r1, r0)
-#  define movr_f_w(r0, r1)             MFC1(r1, r0)
+#  define movr_f_w(r0, r1)             MFC1(r0, r1)
 #  define movi_f_w(r0, i0)             _movi_f_w(_jit, r0, i0)
 static void _movi_f_w(jit_state_t*,jit_int32_t,jit_float32_t*);
 #  define extr_f(r0, r1)               _extr_f(_jit, r0, r1)
 #  define movi_f_w(r0, i0)             _movi_f_w(_jit, r0, i0)
 static void _movi_f_w(jit_state_t*,jit_int32_t,jit_float32_t*);
 #  define extr_f(r0, r1)               _extr_f(_jit, r0, r1)
@@ -565,7 +624,22 @@ _c_cond_fmt(jit_state_t *_jit, jit_int32_t fm,
     i.ft.b = ft;
     i.fm.b = fm;
     i.hc.b = MIPS_COP1;
     i.ft.b = ft;
     i.fm.b = fm;
     i.hc.b = MIPS_COP1;
-    ii(i.op);
+    instr(i.op);
+}
+
+static void
+_cmp_cond_fmt(jit_state_t *_jit, jit_int32_t fm, jit_int32_t fd,
+             jit_int32_t ft, jit_int32_t fs, jit_int32_t cn)
+{
+    jit_instr_t                i;
+    i.op = 0;          /* must have bit 6 zero ed */
+    i.cn.b = cn;
+    i.ft.b = ft;
+    i.fs.b = fs;
+    i.fd.b = fd;
+    i.fm.b = fm;
+    i.hc.b = MIPS_COP1;
+    instr(i.op);
 }
 
 #  define fpr_opi(name, type, size)                                    \
 }
 
 #  define fpr_opi(name, type, size)                                    \
@@ -829,16 +903,28 @@ static void
 _movr_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     assert(r1 == r2 - 1);
 _movr_ww_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     assert(r1 == r2 - 1);
-    MTC1(r1, r0 + BE_P);
-    MTC1(r2, r0 + LE_P);
+    if (jit_mips6_p()) {
+       MTC1(r1, r0);
+       MTHC1(r2, r0);
+    }
+    else {
+       MTC1(r1, r0 + BE_P);
+       MTC1(r2, r0 + LE_P);
+    }
 }
 
 static void
 _movr_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     assert(r0 == r1 - 1);
 }
 
 static void
 _movr_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     assert(r0 == r1 - 1);
-    MFC1(r0, r2 + BE_P);
-    MFC1(r1, r2 + LE_P);
+    if (jit_mips6_p()) {
+       MFC1(r0, r2);
+       MFHC1(r1, r2);
+    }
+    else {
+       MFC1(r0, r2 + BE_P);
+       MFC1(r1, r2 + LE_P);
+    }
 }
 
 static void
 }
 
 static void
@@ -896,40 +982,40 @@ _truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-#  if __WORDSIZE == 64 || NEW_ABI
-    LDC1(r0, 0, r1);
-#  else
-    LWC1(r0 + BE_P, 0, r1);
-    LWC1(r0 + LE_P, 4, r1);
-#  endif
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI)
+       LDC1(r0, 0, r1);
+    else {
+       LWC1(r0 + BE_P, 0, r1);
+       LWC1(r0 + LE_P, 4, r1);
+    }
 }
 
 static void
 _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 }
 
 static void
 _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-#  if __WORDSIZE == 64 || NEW_ABI
-    if (can_sign_extend_short_p(i0))
-       LDC1(r0, i0, _ZERO_REGNO);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       LDC1(r0, 0, rn(reg));
-       jit_unget_reg(reg);
-    }
-#  else
-    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
-       LWC1(r0 + BE_P, i0, _ZERO_REGNO);
-       LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+       if (can_sign_extend_short_p(i0))
+           LDC1(r0, i0, _ZERO_REGNO);
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           LDC1(r0, 0, rn(reg));
+           jit_unget_reg(reg);
+       }
     }
     else {
     }
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       LWC1(r0 + BE_P, 0, rn(reg));
-       LWC1(r0 + LE_P, 4, rn(reg));
-       jit_unget_reg(reg);
+       if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+           LWC1(r0 + BE_P, i0, _ZERO_REGNO);
+           LWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           ldr_d(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
     }
     }
-#  endif
 }
 
 static void
 }
 
 static void
@@ -946,52 +1032,60 @@ static void
 _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
 _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-#  if __WORDSIZE == 64 || NEW_ABI
-    if (can_sign_extend_short_p(i0))
-       LDC1(r0, i0, r1);
-#  else
-    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
-       LWC1(r0 + BE_P, i0, r1);
-       LWC1(r0 + LE_P, i0 + 4, r1);
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+       if (can_sign_extend_short_p(i0))
+           LDC1(r0, i0, r1);
+       else
+           goto fallback;
     }
     }
-#  endif
     else {
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_d(r0, rn(reg));
-       jit_unget_reg(reg);
+       if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+           LWC1(r0 + BE_P, i0, r1);
+           LWC1(r0 + LE_P, i0 + 4, r1);
+       }
+       else {
+       fallback:
+           reg = jit_get_reg(jit_class_gpr);
+           addi(rn(reg), r1, i0);
+           ldr_d(r0, rn(reg));
+           jit_unget_reg(reg);
+       }
     }
 }
 
 static void
 _str_d(jit_state_t *_jit,jit_int32_t r0, jit_int32_t r1)
 {
     }
 }
 
 static void
 _str_d(jit_state_t *_jit,jit_int32_t r0, jit_int32_t r1)
 {
-#  if __WORDSIZE == 64 || NEW_ABI
-    SDC1(r1, 0, r0);
-#  else
-    SWC1(r1 + BE_P, 0, r0);
-    SWC1(r1 + LE_P, 4, r0);
-#  endif
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI)
+       SDC1(r1, 0, r0);
+    else {
+       SWC1(r1 + BE_P, 0, r0);
+       SWC1(r1 + LE_P, 4, r0);
+    }
 }
 
 static void
 _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
 }
 
 static void
 _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-#  if __WORDSIZE == 64 || NEW_ABI
-    if (can_sign_extend_short_p(i0))
-       SDC1(r0, i0, _ZERO_REGNO);
-#  else
-    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
-       SWC1(r0 + BE_P, i0, _ZERO_REGNO);
-       SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+    if (jit_mips6_p() ||  __WORDSIZE == 64 || NEW_ABI) {
+       if (can_sign_extend_short_p(i0))
+           SDC1(r0, i0, _ZERO_REGNO);
+       else
+           goto fallback;
     }
     }
-#  endif
     else {
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       str_d(rn(reg), r0);
-       jit_unget_reg(reg);
+       if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+           SWC1(r0 + BE_P, i0, _ZERO_REGNO);
+           SWC1(r0 + LE_P, i0 + 4, _ZERO_REGNO);
+       }
+       else {
+       fallback:
+           reg = jit_get_reg(jit_class_gpr);
+           movi(rn(reg), i0);
+           str_d(rn(reg), r0);
+           jit_unget_reg(reg);
+       }
     }
 }
 
     }
 }
 
@@ -1009,20 +1103,24 @@ static void
 _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
 _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
-#  if __WORDSIZE == 64 || NEW_ABI
-    if (can_sign_extend_short_p(i0))
-       SDC1(r1, i0, r0);
-#  else
-    if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
-       SWC1(r1 + BE_P, i0, r0);
-       SWC1(r1 + LE_P, i0 + 4, r0);
+    if (jit_mips6_p() || __WORDSIZE == 64 || NEW_ABI) {
+       if (can_sign_extend_short_p(i0))
+           SDC1(r1, i0, r0);
+       else
+           goto fallback;
     }
     }
-#  endif
     else {
     else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r0, i0);
-       str_d(rn(reg), r1);
-       jit_unget_reg(reg);
+       if (can_sign_extend_short_p(i0) && can_sign_extend_short_p(i0 + 4)) {
+           SWC1(r1 + BE_P, i0, r0);
+           SWC1(r1 + LE_P, i0 + 4, r0);
+       }
+       else {
+       fallback:
+           reg = jit_get_reg(jit_class_gpr);
+           addi(rn(reg), r0, i0);
+           str_d(rn(reg), r1);
+           jit_unget_reg(reg);
+       }
     }
 }
 
     }
 }
 
@@ -1058,30 +1156,49 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
     else
        DMTC1(_ZERO_REGNO, r0);
 #  else
     else
        DMTC1(_ZERO_REGNO, r0);
 #  else
-    if (_jitc->no_data)
-       reg = jit_get_reg(jit_class_gpr);
-    if (data.i[0]) {
+    if (jit_mips6_p()) {
        if (_jitc->no_data) {
        if (_jitc->no_data) {
-           movi(rn(reg), data.i[0]);
-           MTC1(rn(reg), r0 + BE_P);
+           reg = jit_get_reg(jit_class_gpr);
+#  if __WORDSIZE == 64
+           movi(rn(reg), data.l);
+           DMTC1(rn(reg), r0);
+#  else
+           movi(rn(reg), data.i[0 + BE_P]);
+           MTC1(rn(reg), r0);
+           movi(rn(reg), data.i[0 + LE_P]);
+           MTHC1(rn(reg), r0);
+#  endif
+           jit_unget_reg(reg);
        }
        else
        }
        else
-           ldi_f(r0 + BE_P, (jit_word_t)i0);
+           ldi_d(r0, (jit_word_t)i0);
     }
     }
-    else
-       MTC1(_ZERO_REGNO, r0 + BE_P);
-    if (data.i[1]) {
-       if (_jitc->no_data) {
-           movi(rn(reg), data.i[1]);
-           MTC1(rn(reg), r0 + LE_P);
+    else {
+       if (_jitc->no_data)
+           reg = jit_get_reg(jit_class_gpr);
+       if (data.i[0]) {
+           if (_jitc->no_data) {
+               movi(rn(reg), data.i[0]);
+               MTC1(rn(reg), r0 + BE_P);
+           }
+           else
+               ldi_f(r0 + BE_P, (jit_word_t)i0);
        }
        else
        }
        else
-           ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4);
+           MTC1(_ZERO_REGNO, r0 + BE_P);
+       if (data.i[1]) {
+           if (_jitc->no_data) {
+               movi(rn(reg), data.i[1]);
+               MTC1(rn(reg), r0 + LE_P);
+           }
+           else
+               ldi_f(r0 + LE_P, ((jit_word_t)i0) + 4);
+       }
+       else
+           MTC1(_ZERO_REGNO, r0 + LE_P);
+       if (_jitc->no_data)
+           jit_unget_reg(reg);
     }
     }
-    else
-       MTC1(_ZERO_REGNO, r0 + LE_P);
-    if (_jitc->no_data)
-       jit_unget_reg(reg);
 #  endif
 }
 
 #  endif
 }
 
@@ -1089,13 +1206,26 @@ static void
 _ltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LT_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_OLT_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(lt)
 
 }
 fopi(lt)
 
@@ -1103,13 +1233,26 @@ static void
 _ler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LE_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_OLE_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(le)
 
 }
 fopi(le)
 
@@ -1117,13 +1260,26 @@ static void
 _eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_EQ_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_EQ_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(eq)
 
 }
 fopi(eq)
 
@@ -1131,13 +1287,26 @@ static void
 _ger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULT_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_ULT_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ge)
 
 }
 fopi(ge)
 
@@ -1145,13 +1314,26 @@ static void
 _gtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _gtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULE_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_ULE_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(gt)
 
 }
 fopi(gt)
 
@@ -1159,13 +1341,26 @@ static void
 _ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_EQ_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_EQ_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ne)
 
 }
 fopi(ne)
 
@@ -1173,13 +1368,26 @@ static void
 _unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _unltr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULT_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_ULT_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(unlt)
 
 }
 fopi(unlt)
 
@@ -1187,13 +1395,26 @@ static void
 _unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _unler_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULE_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_ULE_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(unle)
 
 }
 fopi(unle)
 
@@ -1201,13 +1422,26 @@ static void
 _uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UEQ_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_UEQ_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(uneq)
 
 }
 fopi(uneq)
 
@@ -1215,13 +1449,26 @@ static void
 _unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _unger_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LT_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_OLT_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(unge)
 
 }
 fopi(unge)
 
@@ -1229,13 +1476,26 @@ static void
 _ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ungtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LE_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_OLE_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ungt)
 
 }
 fopi(ungt)
 
@@ -1243,13 +1503,26 @@ static void
 _ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UEQ_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_UEQ_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ltgt)
 
 }
 fopi(ltgt)
 
@@ -1257,13 +1530,26 @@ static void
 _ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UN_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_UN_S(r1, r2);
+       flush();
+       /* cannot optimize delay slot */
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(ord)
 
 }
 fopi(ord)
 
@@ -1271,13 +1557,26 @@ static void
 _unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _unordr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UN_S(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_UN_S(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 fopi(unord)
 
 }
 fopi(unord)
 
@@ -1285,10 +1584,25 @@ static jit_word_t
 _bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LT_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLT_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(lt)
     return (w);
 }
 fbopi(lt)
@@ -1297,10 +1611,25 @@ static jit_word_t
 _bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LE_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLE_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(le)
     return (w);
 }
 fbopi(le)
@@ -1309,10 +1638,25 @@ static jit_word_t
 _beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_EQ_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_EQ_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(eq)
     return (w);
 }
 fbopi(eq)
@@ -1321,10 +1665,25 @@ static jit_word_t
 _bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULT_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULT_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ge)
     return (w);
 }
 fbopi(ge)
@@ -1333,10 +1692,25 @@ static jit_word_t
 _bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULE_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULE_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(gt)
     return (w);
 }
 fbopi(gt)
@@ -1345,10 +1719,25 @@ static jit_word_t
 _bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_EQ_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_EQ_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ne)
     return (w);
 }
 fbopi(ne)
@@ -1357,10 +1746,25 @@ static jit_word_t
 _bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULT_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULT_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(unlt)
     return (w);
 }
 fbopi(unlt)
@@ -1369,10 +1773,25 @@ static jit_word_t
 _bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULE_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULE_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(unle)
     return (w);
 }
 fbopi(unle)
@@ -1381,10 +1800,25 @@ static jit_word_t
 _buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UEQ_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UEQ_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(uneq)
     return (w);
 }
 fbopi(uneq)
@@ -1393,10 +1827,25 @@ static jit_word_t
 _bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LT_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLT_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(unge)
     return (w);
 }
 fbopi(unge)
@@ -1405,10 +1854,25 @@ static jit_word_t
 _bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LE_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLE_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ungt)
     return (w);
 }
 fbopi(ungt)
@@ -1417,10 +1881,25 @@ static jit_word_t
 _bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UEQ_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UEQ_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ltgt)
     return (w);
 }
 fbopi(ltgt)
@@ -1429,10 +1908,25 @@ static jit_word_t
 _bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_S(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UN_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UN_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(ord)
     return (w);
 }
 fbopi(ord)
@@ -1441,10 +1935,25 @@ static jit_word_t
 _bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_S(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UN_S(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UN_S(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 fbopi(unord)
     return (w);
 }
 fbopi(unord)
@@ -1453,13 +1962,26 @@ static void
 _ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LT_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_OLT_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(lt)
 
 }
 dopi(lt)
 
@@ -1467,13 +1989,26 @@ static void
 _ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LE_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_OLE_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(le)
 
 }
 dopi(le)
 
@@ -1481,13 +2016,26 @@ static void
 _eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_EQ_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_EQ_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(eq)
 
 }
 dopi(eq)
 
@@ -1495,13 +2043,26 @@ static void
 _ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULT_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_ULT_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ge)
 
 }
 dopi(ge)
 
@@ -1509,13 +2070,26 @@ static void
 _gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULE_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_ULE_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(gt)
 
 }
 dopi(gt)
 
@@ -1523,13 +2097,26 @@ static void
 _ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_EQ_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_EQ_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ne)
 
 }
 dopi(ne)
 
@@ -1537,13 +2124,26 @@ static void
 _unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULT_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_ULT_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(unlt)
 
 }
 dopi(unlt)
 
@@ -1551,13 +2151,26 @@ static void
 _unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_ULE_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_ULE_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(unle)
 
 }
 dopi(unle)
 
@@ -1565,13 +2178,26 @@ static void
 _uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UEQ_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_UEQ_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(uneq)
 
 }
 dopi(uneq)
 
@@ -1579,13 +2205,26 @@ static void
 _unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LT_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_OLT_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(unge)
 
 }
 dopi(unge)
 
@@ -1593,13 +2232,26 @@ static void
 _ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_LE_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_OLE_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ungt)
 
 }
 dopi(ungt)
 
@@ -1607,13 +2259,26 @@ static void
 _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UEQ_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_UEQ_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ltgt)
 
 }
 dopi(ltgt)
 
@@ -1621,13 +2286,26 @@ static void
 _ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UN_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       addi(r0, r0, 1);
+    }
+    else {
+       C_UN_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1F(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(ord)
 
 }
 dopi(ord)
 
@@ -1635,13 +2313,26 @@ static void
 _unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(0);
-    /* delay slot */
-    movi(r0, 1);
-    movi(r0, 0);
-    patch_at(w, _jit->pc.w);
+    jit_int32_t                reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg(jit_class_fpr);
+       CMP_UN_D(rn(reg), r1, r2);
+       MFC1(r0, rn(reg));
+       jit_unget_reg(reg);
+       andi(r0, r0, 1);
+    }
+    else {
+       C_UN_D(r1, r2);
+       /* cannot optimize delay slot */
+       flush();
+       w = _jit->pc.w;
+       BC1T(0);
+       /* delay slot */
+       movi(r0, 1);
+       movi(r0, 0);
+       flush();
+       patch_at(w, _jit->pc.w);
+    }
 }
 dopi(unord)
 
 }
 dopi(unord)
 
@@ -1649,10 +2340,25 @@ static jit_word_t
 _bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LT_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLT_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(lt)
     return (w);
 }
 dbopi(lt)
@@ -1661,10 +2367,25 @@ static jit_word_t
 _bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LE_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLE_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(le)
     return (w);
 }
 dbopi(le)
@@ -1673,10 +2394,25 @@ static jit_word_t
 _beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_EQ_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_EQ_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(eq)
     return (w);
 }
 dbopi(eq)
@@ -1685,10 +2421,25 @@ static jit_word_t
 _bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULT_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULT_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ge)
     return (w);
 }
 dbopi(ge)
@@ -1697,10 +2448,25 @@ static jit_word_t
 _bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULE_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULE_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(gt)
     return (w);
 }
 dbopi(gt)
@@ -1709,10 +2475,25 @@ static jit_word_t
 _bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_EQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_EQ_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_EQ_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ne)
     return (w);
 }
 dbopi(ne)
@@ -1721,10 +2502,25 @@ static jit_word_t
 _bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULT_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULT_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(unlt)
     return (w);
 }
 dbopi(unlt)
@@ -1733,10 +2529,25 @@ static jit_word_t
 _bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_ULE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_ULE_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_ULE_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(unle)
     return (w);
 }
 dbopi(unle)
@@ -1745,10 +2556,25 @@ static jit_word_t
 _buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UEQ_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UEQ_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(uneq)
     return (w);
 }
 dbopi(uneq)
@@ -1757,10 +2583,25 @@ static jit_word_t
 _bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLT_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LT_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLT_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(unge)
     return (w);
 }
 dbopi(unge)
@@ -1769,10 +2610,25 @@ static jit_word_t
 _bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_OLE_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_LE_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_OLE_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ungt)
     return (w);
 }
 dbopi(ungt)
@@ -1781,10 +2637,25 @@ static jit_word_t
 _bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UEQ_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UEQ_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UEQ_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ltgt)
     return (w);
 }
 dbopi(ltgt)
@@ -1793,10 +2664,25 @@ static jit_word_t
 _bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_D(r1, r2);
-    w = _jit->pc.w;
-    BC1F(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UN_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1EQZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UN_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1F(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(ord)
     return (w);
 }
 dbopi(ord)
@@ -1805,10 +2691,25 @@ static jit_word_t
 _bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
 _bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         w;
-    C_UN_D(r1, r2);
-    w = _jit->pc.w;
-    BC1T(((i0 - w) >> 2) - 1);
-    NOP(1);
+    jit_int32_t                op, reg;
+    if (jit_mips6_p()) {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr, r1, r2);
+       op = pending();
+       CMP_UN_D(rn(reg), r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1NEZ(rn(reg), ((i0 - w) >> 2) - 1);
+    }
+    else {
+       reg = jit_get_reg_for_delay_slot(jit_class_fpr|jit_class_chk, r1, r2);
+       op = pending();
+       C_UN_D(r1, r2);
+       flush();
+       w = _jit->pc.w;
+       BC1T(((i0 - w) >> 2) - 1);
+    }
+    delay(op);
+    jit_unget_reg(reg);
     return (w);
 }
 dbopi(unord)
     return (w);
 }
 dbopi(unord)
index 91deb4b..0a7436d 100644 (file)
 
 #if __WORDSIZE == 32
 
 #if __WORDSIZE == 32
-#if NEW_ABI
-#define JIT_INSTR_MAX 52
-    0, /* data */
-    0, /* live */
-    0, /* align */
-    0, /* save */
-    0, /* load */
-    0, /* #name */
-    0, /* #note */
-    0, /* label */
-    44,        /* prolog */
-    0, /* ellipsis */
-    0, /* va_push */
-    0, /* allocai */
-    0, /* allocar */
-    0, /* arg */
-    0, /* getarg_c */
-    0, /* getarg_uc */
-    0, /* getarg_s */
-    0, /* getarg_us */
-    0, /* getarg_i */
-    0, /* getarg_ui */
-    0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    0, /* va_start */
-    0, /* va_arg */
-    0, /* va_arg_d */
-    0, /* va_end */
-    4, /* addr */
-    12,        /* addi */
-    12,        /* addcr */
-    20,        /* addci */
-    28,        /* addxr */
-    28,        /* addxi */
-    4, /* subr */
-    12,        /* subi */
-    12,        /* subcr */
-    20,        /* subci */
-    28,        /* subxr */
-    28,        /* subxi */
-    16,        /* rsbi */
-    4, /* mulr */
-    12,        /* muli */
-    12,        /* qmulr */
-    20,        /* qmuli */
-    12,        /* qmulr_u */
-    20,        /* qmuli_u */
-    8, /* divr */
-    16,        /* divi */
-    8, /* divr_u */
-    16,        /* divi_u */
-    12,        /* qdivr */
-    16,        /* qdivi */
-    12,        /* qdivr_u */
-    16,        /* qdivi_u */
-    8, /* remr */
-    16,        /* remi */
-    8, /* remr_u */
-    16,        /* remi_u */
-    4, /* andr */
-    12,        /* andi */
-    4, /* orr */
-    12,        /* ori */
-    4, /* xorr */
-    12,        /* xori */
-    4, /* lshr */
-    4, /* lshi */
-    4, /* rshr */
-    4, /* rshi */
-    4, /* rshr_u */
-    4, /* rshi_u */
-    4, /* negr */
-    8, /* comr */
-    4, /* ltr */
-    4, /* lti */
-    4, /* ltr_u */
-    4, /* lti_u */
-    8, /* ler */
-    12,        /* lei */
-    8, /* ler_u */
-    12,        /* lei_u */
-    12,        /* eqr */
-    12,        /* eqi */
-    8, /* ger */
-    12,        /* gei */
-    8, /* ger_u */
-    12,        /* gei_u */
-    4, /* gtr */
-    8, /* gti */
-    4, /* gtr_u */
-    8, /* gti_u */
-    8, /* ner */
-    8, /* nei */
-    4, /* movr */
-    8, /* movi */
-    4, /* movnr */
-    4, /* movzr */
-    8, /* extr_c */
-    4, /* extr_uc */
-    8, /* extr_s */
-    4, /* extr_us */
-    0, /* extr_i */
-    0, /* extr_ui */
-    4, /* htonr_us */
-    4, /* htonr_ui */
-    0, /* htonr_ul */
-    4, /* ldr_c */
-    12,        /* ldi_c */
-    4, /* ldr_uc */
-    12,        /* ldi_uc */
-    4, /* ldr_s */
-    12,        /* ldi_s */
-    4, /* ldr_us */
-    12,        /* ldi_us */
-    4, /* ldr_i */
-    12,        /* ldi_i */
-    0, /* ldr_ui */
-    0, /* ldi_ui */
-    0, /* ldr_l */
-    0, /* ldi_l */
-    8, /* ldxr_c */
-    4, /* ldxi_c */
-    8, /* ldxr_uc */
-    4, /* ldxi_uc */
-    8, /* ldxr_s */
-    4, /* ldxi_s */
-    8, /* ldxr_us */
-    4, /* ldxi_us */
-    8, /* ldxr_i */
-    4, /* ldxi_i */
-    0, /* ldxr_ui */
-    0, /* ldxi_ui */
-    0, /* ldxr_l */
-    0, /* ldxi_l */
-    4, /* str_c */
-    12,        /* sti_c */
-    4, /* str_s */
-    12,        /* sti_s */
-    4, /* str_i */
-    12,        /* sti_i */
-    0, /* str_l */
-    0, /* sti_l */
-    8, /* stxr_c */
-    4, /* stxi_c */
-    8, /* stxr_s */
-    4, /* stxi_s */
-    8, /* stxr_i */
-    4, /* stxi_i */
-    0, /* stxr_l */
-    0, /* stxi_l */
-    12,        /* bltr */
-    12,        /* blti */
-    12,        /* bltr_u */
-    12,        /* blti_u */
-    12,        /* bler */
-    16,        /* blei */
-    12,        /* bler_u */
-    16,        /* blei_u */
-    8, /* beqr */
-    16,        /* beqi */
-    12,        /* bger */
-    12,        /* bgei */
-    12,        /* bger_u */
-    12,        /* bgei_u */
-    12,        /* bgtr */
-    16,        /* bgti */
-    12,        /* bgtr_u */
-    16,        /* bgti_u */
-    8, /* bner */
-    16,        /* bnei */
-    12,        /* bmsr */
-    12,        /* bmsi */
-    12,        /* bmcr */
-    12,        /* bmci */
-    28,        /* boaddr */
-    28,        /* boaddi */
-    16,        /* boaddr_u */
-    20,        /* boaddi_u */
-    28,        /* bxaddr */
-    28,        /* bxaddi */
-    16,        /* bxaddr_u */
-    20,        /* bxaddi_u */
-    28,        /* bosubr */
-    28,        /* bosubi */
-    16,        /* bosubr_u */
-    20,        /* bosubi_u */
-    28,        /* bxsubr */
-    28,        /* bxsubi */
-    16,        /* bxsubr_u */
-    20,        /* bxsubi_u */
-    0, /* jmpr */
-    8, /* jmpi */
-    12,        /* callr */
-    16,        /* calli */
-    0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
-    0, /* finishr */
-    0, /* finishi */
-    0, /* ret */
-    0, /* retr */
-    0, /* reti */
-    0, /* retval_c */
-    0, /* retval_uc */
-    0, /* retval_s */
-    0, /* retval_us */
-    0, /* retval_i */
-    0, /* retval_ui */
-    0, /* retval_l */
-    44,        /* epilog */
-    0, /* arg_f */
-    0, /* getarg_f */
-    0, /* putargr_f */
-    0, /* putargi_f */
-    4, /* addr_f */
-    16,        /* addi_f */
-    4, /* subr_f */
-    16,        /* subi_f */
-    16,        /* rsbi_f */
-    4, /* mulr_f */
-    16,        /* muli_f */
-    4, /* divr_f */
-    16,        /* divi_f */
-    4, /* negr_f */
-    4, /* absr_f */
-    4, /* sqrtr_f */
-    16,        /* ltr_f */
-    28,        /* lti_f */
-    16,        /* ler_f */
-    28,        /* lei_f */
-    16,        /* eqr_f */
-    28,        /* eqi_f */
-    16,        /* ger_f */
-    28,        /* gei_f */
-    16,        /* gtr_f */
-    28,        /* gti_f */
-    16,        /* ner_f */
-    28,        /* nei_f */
-    16,        /* unltr_f */
-    28,        /* unlti_f */
-    16,        /* unler_f */
-    28,        /* unlei_f */
-    16,        /* uneqr_f */
-    28,        /* uneqi_f */
-    16,        /* unger_f */
-    28,        /* ungei_f */
-    16,        /* ungtr_f */
-    28,        /* ungti_f */
-    16,        /* ltgtr_f */
-    28,        /* ltgti_f */
-    16,        /* ordr_f */
-    28,        /* ordi_f */
-    16,        /* unordr_f */
-    28,        /* unordi_f */
-    8, /* truncr_f_i */
-    0, /* truncr_f_l */
-    8, /* extr_f */
-    4, /* extr_d_f */
-    4, /* movr_f */
-    12,        /* movi_f */
-    4, /* ldr_f */
-    12,        /* ldi_f */
-    8, /* ldxr_f */
-    4, /* ldxi_f */
-    4, /* str_f */
-    12,        /* sti_f */
-    8, /* stxr_f */
-    4, /* stxi_f */
-    12,        /* bltr_f */
-    24,        /* blti_f */
-    12,        /* bler_f */
-    24,        /* blei_f */
-    12,        /* beqr_f */
-    24,        /* beqi_f */
-    12,        /* bger_f */
-    24,        /* bgei_f */
-    12,        /* bgtr_f */
-    24,        /* bgti_f */
-    12,        /* bner_f */
-    24,        /* bnei_f */
-    12,        /* bunltr_f */
-    24,        /* bunlti_f */
-    12,        /* bunler_f */
-    24,        /* bunlei_f */
-    12,        /* buneqr_f */
-    24,        /* buneqi_f */
-    12,        /* bunger_f */
-    24,        /* bungei_f */
-    12,        /* bungtr_f */
-    24,        /* bungti_f */
-    12,        /* bltgtr_f */
-    24,        /* bltgti_f */
-    12,        /* bordr_f */
-    24,        /* bordi_f */
-    12,        /* bunordr_f */
-    24,        /* bunordi_f */
-    0, /* pushargr_f */
-    0, /* pushargi_f */
-    0, /* retr_f */
-    0, /* reti_f */
-    0, /* retval_f */
-    0, /* arg_d */
-    0, /* getarg_d */
-    0, /* putargr_d */
-    0, /* putargi_d */
-    4, /* addr_d */
-    16,        /* addi_d */
-    4, /* subr_d */
-    16,        /* subi_d */
-    16,        /* rsbi_d */
-    4, /* mulr_d */
-    16,        /* muli_d */
-    4, /* divr_d */
-    16,        /* divi_d */
-    4, /* negr_d */
-    4, /* absr_d */
-    4, /* sqrtr_d */
-    16,        /* ltr_d */
-    28,        /* lti_d */
-    16,        /* ler_d */
-    28,        /* lei_d */
-    16,        /* eqr_d */
-    28,        /* eqi_d */
-    16,        /* ger_d */
-    28,        /* gei_d */
-    16,        /* gtr_d */
-    28,        /* gti_d */
-    16,        /* ner_d */
-    28,        /* nei_d */
-    16,        /* unltr_d */
-    28,        /* unlti_d */
-    16,        /* unler_d */
-    28,        /* unlei_d */
-    16,        /* uneqr_d */
-    28,        /* uneqi_d */
-    16,        /* unger_d */
-    28,        /* ungei_d */
-    16,        /* ungtr_d */
-    28,        /* ungti_d */
-    16,        /* ltgtr_d */
-    28,        /* ltgti_d */
-    16,        /* ordr_d */
-    28,        /* ordi_d */
-    16,        /* unordr_d */
-    28,        /* unordi_d */
-    8, /* truncr_d_i */
-    0, /* truncr_d_l */
-    8, /* extr_d */
-    4, /* extr_f_d */
-    4, /* movr_d */
-    12,        /* movi_d */
-    4, /* ldr_d */
-    12,        /* ldi_d */
-    8, /* ldxr_d */
-    4, /* ldxi_d */
-    4, /* str_d */
-    12,        /* sti_d */
-    8, /* stxr_d */
-    4, /* stxi_d */
-    12,        /* bltr_d */
-    24,        /* blti_d */
-    12,        /* bler_d */
-    24,        /* blei_d */
-    12,        /* beqr_d */
-    24,        /* beqi_d */
-    12,        /* bger_d */
-    24,        /* bgei_d */
-    12,        /* bgtr_d */
-    24,        /* bgti_d */
-    12,        /* bner_d */
-    24,        /* bnei_d */
-    12,        /* bunltr_d */
-    24,        /* bunlti_d */
-    12,        /* bunler_d */
-    24,        /* bunlei_d */
-    12,        /* buneqr_d */
-    24,        /* buneqi_d */
-    12,        /* bunger_d */
-    24,        /* bungei_d */
-    12,        /* bungtr_d */
-    24,        /* bungti_d */
-    12,        /* bltgtr_d */
-    24,        /* bltgti_d */
-    12,        /* bordr_d */
-    24,        /* bordi_d */
-    12,        /* bunordr_d */
-    24,        /* bunordi_d */
-    0, /* pushargr_d */
-    0, /* pushargi_d */
-    0, /* retr_d */
-    0, /* reti_d */
-    0, /* retval_d */
-    0, /* movr_w_f */
-    0, /* movr_ww_d */
-    0, /* movr_w_d */
-    0, /* movr_f_w */
-    0, /* movi_f_w */
-    0, /* movr_d_ww */
-    0, /* movi_d_ww */
-    4, /* movr_d_w */
-    12,        /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
-#endif /* NEW_ABI */
-#endif /* __WORDSIZE */
-
-#if __WORDSIZE == 32
-#if !NEW_ABI
 #define JIT_INSTR_MAX 116
     0, /* data */
     0, /* live */
 #define JIT_INSTR_MAX 116
     0, /* data */
     0, /* live */
-    0, /* align */
+    20,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     20,        /* va_arg_d */
     4, /* va_start */
     8, /* va_arg */
     20,        /* va_arg_d */
     8, /* remr_u */
     16,        /* remi_u */
     4, /* andr */
     8, /* remr_u */
     16,        /* remi_u */
     4, /* andr */
-    12,        /* andi */
+    8, /* andi */
     4, /* orr */
     12,        /* ori */
     4, /* xorr */
     4, /* orr */
     12,        /* ori */
     4, /* xorr */
     4, /* ltr_u */
     4, /* lti_u */
     8, /* ler */
     4, /* ltr_u */
     4, /* lti_u */
     8, /* ler */
-    12,        /* lei */
+    4, /* lei */
     8, /* ler_u */
     8, /* ler_u */
-    12,        /* lei_u */
-    12,        /* eqr */
-    12,        /* eqi */
+    4, /* lei_u */
+    8, /* eqr */
+    8, /* eqi */
     8, /* ger */
     8, /* ger */
-    12,        /* gei */
+    8, /* gei */
     8, /* ger_u */
     8, /* ger_u */
-    12,        /* gei_u */
+    8, /* gei_u */
     4, /* gtr */
     8, /* gti */
     4, /* gtr_u */
     4, /* gtr */
     8, /* gti */
     4, /* gtr_u */
     8, /* movi */
     4, /* movnr */
     4, /* movzr */
     8, /* movi */
     4, /* movnr */
     4, /* movzr */
-    8, /* extr_c */
+    36,        /* casr */
+    44,        /* casi */
+    4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_uc */
-    8, /* extr_s */
+    4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
-    20,        /* htonr_us */
-    52,        /* htonr_ui */
+    8, /* bswapr_us */
+    8, /* bswapr_ui */
+    0, /* bswapr_ul */
+    4, /* htonr_us */
+    4, /* htonr_ui */
     0, /* htonr_ul */
     4, /* ldr_c */
     12,        /* ldi_c */
     0, /* htonr_ul */
     4, /* ldr_c */
     12,        /* ldi_c */
     20,        /* bxsubi_u */
     8, /* jmpr */
     8, /* jmpi */
     20,        /* bxsubi_u */
     8, /* jmpr */
     8, /* jmpi */
-    12,        /* callr */
+    8, /* callr */
     16,        /* calli */
     0, /* prepare */
     16,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     8, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     8, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
-#endif /* NEW_ABI */
+    8, /* clo */
+    8, /* clz */
+    76,        /* cto */
+    76,        /* ctz */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 116
+#define JIT_INSTR_MAX 76
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    4, /* align */
+    24,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
-    44,        /* prolog */
+    76,        /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    0, /* va_start */
-    0, /* va_arg */
-    0, /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     28,        /* addi */
     0, /* va_end */
     4, /* addr */
     28,        /* addi */
     36,        /* subci */
     28,        /* subxr */
     28,        /* subxi */
     36,        /* subci */
     28,        /* subxr */
     28,        /* subxi */
-    32,        /* rsbi */
+    36,        /* rsbi */
     8, /* mulr */
     32,        /* muli */
     12,        /* qmulr */
     8, /* mulr */
     32,        /* muli */
     12,        /* qmulr */
     8, /* remr_u */
     32,        /* remi_u */
     4, /* andr */
     8, /* remr_u */
     32,        /* remi_u */
     4, /* andr */
-    28,        /* andi */
+    8, /* andi */
     4, /* orr */
     28,        /* ori */
     4, /* xorr */
     4, /* orr */
     28,        /* ori */
     4, /* xorr */
     4, /* ltr_u */
     4, /* lti_u */
     8, /* ler */
     4, /* ltr_u */
     4, /* lti_u */
     8, /* ler */
-    12,        /* lei */
+    4, /* lei */
     8, /* ler_u */
     8, /* ler_u */
-    12,        /* lei_u */
-    12,        /* eqr */
-    12,        /* eqi */
+    4, /* lei_u */
+    8, /* eqr */
+    8, /* eqi */
     8, /* ger */
     8, /* ger */
-    12,        /* gei */
+    8, /* gei */
     8, /* ger_u */
     8, /* ger_u */
-    12,        /* gei_u */
+    8, /* gei_u */
     4, /* gtr */
     8, /* gti */
     4, /* gtr_u */
     4, /* gtr */
     8, /* gti */
     4, /* gtr_u */
     28,        /* movi */
     4, /* movnr */
     4, /* movzr */
     28,        /* movi */
     4, /* movnr */
     4, /* movzr */
-    8, /* extr_c */
+    36,        /* casr */
+    56,        /* casi */
+    4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_uc */
-    8, /* extr_s */
+    4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_us */
     4, /* extr_i */
-    8, /* extr_ui */
+    4, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    44,        /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
     4, /* htonr_us */
     4, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
-    12,        /* ldi_c */
+    24,        /* ldi_c */
     4, /* ldr_uc */
     4, /* ldr_uc */
-    12,        /* ldi_uc */
+    24,        /* ldi_uc */
     4, /* ldr_s */
     4, /* ldr_s */
-    12,        /* ldi_s */
+    24,        /* ldi_s */
     4, /* ldr_us */
     4, /* ldr_us */
-    12,        /* ldi_us */
+    24,        /* ldi_us */
     4, /* ldr_i */
     4, /* ldr_i */
-    12,        /* ldi_i */
+    24,        /* ldi_i */
     4, /* ldr_ui */
     4, /* ldr_ui */
-    12,        /* ldi_ui */
+    24,        /* ldi_ui */
     4, /* ldr_l */
     4, /* ldr_l */
-    12,        /* ldi_l */
+    24,        /* ldi_l */
     8, /* ldxr_c */
     8, /* ldxr_c */
-    4, /* ldxi_c */
+    16,        /* ldxi_c */
     8, /* ldxr_uc */
     8, /* ldxr_uc */
-    4, /* ldxi_uc */
+    16,        /* ldxi_uc */
     8, /* ldxr_s */
     8, /* ldxr_s */
-    4, /* ldxi_s */
+    16,        /* ldxi_s */
     8, /* ldxr_us */
     8, /* ldxr_us */
-    4, /* ldxi_us */
+    16,        /* ldxi_us */
     8, /* ldxr_i */
     8, /* ldxr_i */
-    4, /* ldxi_i */
+    16,        /* ldxi_i */
     8, /* ldxr_ui */
     8, /* ldxr_ui */
-    4, /* ldxi_ui */
+    16,        /* ldxi_ui */
     8, /* ldxr_l */
     8, /* ldxr_l */
-    4, /* ldxi_l */
+    16,        /* ldxi_l */
     4, /* str_c */
     4, /* str_c */
-    12,        /* sti_c */
+    24,        /* sti_c */
     4, /* str_s */
     4, /* str_s */
-    12,        /* sti_s */
+    24,        /* sti_s */
     4, /* str_i */
     4, /* str_i */
-    12,        /* sti_i */
+    24,        /* sti_i */
     4, /* str_l */
     4, /* str_l */
-    12,        /* sti_l */
+    24,        /* sti_l */
     8, /* stxr_c */
     8, /* stxr_c */
-    4, /* stxi_c */
+    16,        /* stxi_c */
     8, /* stxr_s */
     8, /* stxr_s */
-    4, /* stxi_s */
+    16,        /* stxi_s */
     8, /* stxr_i */
     8, /* stxr_i */
-    4, /* stxi_i */
+    16,        /* stxi_i */
     8, /* stxr_l */
     8, /* stxr_l */
-    4, /* stxi_l */
+    16,        /* stxi_l */
     12,        /* bltr */
     12,        /* blti */
     12,        /* bltr_u */
     12,        /* bltr */
     12,        /* blti */
     12,        /* bltr_u */
     12,        /* bgtr_u */
     16,        /* bgti_u */
     8, /* bner */
     12,        /* bgtr_u */
     16,        /* bgti_u */
     8, /* bner */
-    32,        /* bnei */
+    28,        /* bnei */
     12,        /* bmsr */
     12,        /* bmsi */
     12,        /* bmcr */
     12,        /* bmsr */
     12,        /* bmsi */
     12,        /* bmcr */
     28,        /* bxsubi */
     16,        /* bxsubr_u */
     20,        /* bxsubi_u */
     28,        /* bxsubi */
     16,        /* bxsubr_u */
     20,        /* bxsubi_u */
-    0, /* jmpr */
+    8, /* jmpr */
     8, /* jmpi */
     8, /* jmpi */
-    12,        /* callr */
+    8, /* callr */
     32,        /* calli */
     0, /* prepare */
     32,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_i */
     0, /* retval_ui */
     0, /* retval_l */
     0, /* retval_i */
     0, /* retval_ui */
     0, /* retval_l */
-    44,        /* epilog */
+    76,        /* epilog */
     0, /* arg_f */
     0, /* getarg_f */
     0, /* putargr_f */
     0, /* putargi_f */
     4, /* addr_f */
     0, /* arg_f */
     0, /* getarg_f */
     0, /* putargr_f */
     0, /* putargi_f */
     4, /* addr_f */
-    16,        /* addi_f */
+    28,        /* addi_f */
     4, /* subr_f */
     4, /* subr_f */
-    16,        /* subi_f */
-    16,        /* rsbi_f */
+    28,        /* subi_f */
+    28,        /* rsbi_f */
     4, /* mulr_f */
     4, /* mulr_f */
-    16,        /* muli_f */
+    28,        /* muli_f */
     4, /* divr_f */
     4, /* divr_f */
-    16,        /* divi_f */
+    28,        /* divi_f */
     4, /* negr_f */
     4, /* absr_f */
     4, /* sqrtr_f */
     16,        /* ltr_f */
     4, /* negr_f */
     4, /* absr_f */
     4, /* sqrtr_f */
     16,        /* ltr_f */
-    28,        /* lti_f */
+    40,        /* lti_f */
     16,        /* ler_f */
     16,        /* ler_f */
-    28,        /* lei_f */
+    40,        /* lei_f */
     16,        /* eqr_f */
     16,        /* eqr_f */
-    28,        /* eqi_f */
+    40,        /* eqi_f */
     16,        /* ger_f */
     16,        /* ger_f */
-    28,        /* gei_f */
+    40,        /* gei_f */
     16,        /* gtr_f */
     16,        /* gtr_f */
-    28,        /* gti_f */
+    40,        /* gti_f */
     16,        /* ner_f */
     16,        /* ner_f */
-    28,        /* nei_f */
+    40,        /* nei_f */
     16,        /* unltr_f */
     16,        /* unltr_f */
-    28,        /* unlti_f */
+    40,        /* unlti_f */
     16,        /* unler_f */
     16,        /* unler_f */
-    28,        /* unlei_f */
+    40,        /* unlei_f */
     16,        /* uneqr_f */
     16,        /* uneqr_f */
-    28,        /* uneqi_f */
+    40,        /* uneqi_f */
     16,        /* unger_f */
     16,        /* unger_f */
-    28,        /* ungei_f */
+    40,        /* ungei_f */
     16,        /* ungtr_f */
     16,        /* ungtr_f */
-    28,        /* ungti_f */
+    40,        /* ungti_f */
     16,        /* ltgtr_f */
     16,        /* ltgtr_f */
-    28,        /* ltgti_f */
+    40,        /* ltgti_f */
     16,        /* ordr_f */
     16,        /* ordr_f */
-    28,        /* ordi_f */
+    40,        /* ordi_f */
     16,        /* unordr_f */
     16,        /* unordr_f */
-    28,        /* unordi_f */
+    40,        /* unordi_f */
     8, /* truncr_f_i */
     8, /* truncr_f_l */
     8, /* extr_f */
     4, /* extr_d_f */
     4, /* movr_f */
     8, /* truncr_f_i */
     8, /* truncr_f_l */
     8, /* extr_f */
     4, /* extr_d_f */
     4, /* movr_f */
-    12,        /* movi_f */
+    24,        /* movi_f */
     4, /* ldr_f */
     4, /* ldr_f */
-    12,        /* ldi_f */
+    24,        /* ldi_f */
     8, /* ldxr_f */
     8, /* ldxr_f */
-    4, /* ldxi_f */
+    16,        /* ldxi_f */
     4, /* str_f */
     4, /* str_f */
-    12,        /* sti_f */
+    24,        /* sti_f */
     8, /* stxr_f */
     8, /* stxr_f */
-    4, /* stxi_f */
+    16,        /* stxi_f */
     12,        /* bltr_f */
     12,        /* bltr_f */
-    24,        /* blti_f */
+    36,        /* blti_f */
     12,        /* bler_f */
     12,        /* bler_f */
-    24,        /* blei_f */
+    36,        /* blei_f */
     12,        /* beqr_f */
     12,        /* beqr_f */
-    24,        /* beqi_f */
+    36,        /* beqi_f */
     12,        /* bger_f */
     12,        /* bger_f */
-    24,        /* bgei_f */
+    36,        /* bgei_f */
     12,        /* bgtr_f */
     12,        /* bgtr_f */
-    24,        /* bgti_f */
+    36,        /* bgti_f */
     12,        /* bner_f */
     12,        /* bner_f */
-    24,        /* bnei_f */
+    36,        /* bnei_f */
     12,        /* bunltr_f */
     12,        /* bunltr_f */
-    24,        /* bunlti_f */
+    36,        /* bunlti_f */
     12,        /* bunler_f */
     12,        /* bunler_f */
-    24,        /* bunlei_f */
+    36,        /* bunlei_f */
     12,        /* buneqr_f */
     12,        /* buneqr_f */
-    24,        /* buneqi_f */
+    36,        /* buneqi_f */
     12,        /* bunger_f */
     12,        /* bunger_f */
-    24,        /* bungei_f */
+    36,        /* bungei_f */
     12,        /* bungtr_f */
     12,        /* bungtr_f */
-    24,        /* bungti_f */
+    36,        /* bungti_f */
     12,        /* bltgtr_f */
     12,        /* bltgtr_f */
-    24,        /* bltgti_f */
+    36,        /* bltgti_f */
     12,        /* bordr_f */
     12,        /* bordr_f */
-    24,        /* bordi_f */
+    36,        /* bordi_f */
     12,        /* bunordr_f */
     12,        /* bunordr_f */
-    24,        /* bunordi_f */
+    36,        /* bunordi_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* putargr_d */
     0, /* putargi_d */
     4, /* addr_d */
     0, /* putargr_d */
     0, /* putargi_d */
     4, /* addr_d */
-    16,        /* addi_d */
+    28,        /* addi_d */
     4, /* subr_d */
     4, /* subr_d */
-    16,        /* subi_d */
-    16,        /* rsbi_d */
+    28,        /* subi_d */
+    28,        /* rsbi_d */
     4, /* mulr_d */
     4, /* mulr_d */
-    16,        /* muli_d */
+    28,        /* muli_d */
     4, /* divr_d */
     4, /* divr_d */
-    16,        /* divi_d */
+    28,        /* divi_d */
     4, /* negr_d */
     4, /* absr_d */
     4, /* sqrtr_d */
     16,        /* ltr_d */
     4, /* negr_d */
     4, /* absr_d */
     4, /* sqrtr_d */
     16,        /* ltr_d */
-    28,        /* lti_d */
+    44,        /* lti_d */
     16,        /* ler_d */
     16,        /* ler_d */
-    28,        /* lei_d */
+    44,        /* lei_d */
     16,        /* eqr_d */
     16,        /* eqr_d */
-    28,        /* eqi_d */
+    44,        /* eqi_d */
     16,        /* ger_d */
     16,        /* ger_d */
-    28,        /* gei_d */
+    44,        /* gei_d */
     16,        /* gtr_d */
     16,        /* gtr_d */
-    28,        /* gti_d */
+    44,        /* gti_d */
     16,        /* ner_d */
     16,        /* ner_d */
-    28,        /* nei_d */
+    44,        /* nei_d */
     16,        /* unltr_d */
     16,        /* unltr_d */
-    28,        /* unlti_d */
+    44,        /* unlti_d */
     16,        /* unler_d */
     16,        /* unler_d */
-    28,        /* unlei_d */
+    44,        /* unlei_d */
     16,        /* uneqr_d */
     16,        /* uneqr_d */
-    28,        /* uneqi_d */
+    44,        /* uneqi_d */
     16,        /* unger_d */
     16,        /* unger_d */
-    28,        /* ungei_d */
+    44,        /* ungei_d */
     16,        /* ungtr_d */
     16,        /* ungtr_d */
-    28,        /* ungti_d */
+    44,        /* ungti_d */
     16,        /* ltgtr_d */
     16,        /* ltgtr_d */
-    28,        /* ltgti_d */
+    44,        /* ltgti_d */
     16,        /* ordr_d */
     16,        /* ordr_d */
-    28,        /* ordi_d */
+    44,        /* ordi_d */
     16,        /* unordr_d */
     16,        /* unordr_d */
-    28,        /* unordi_d */
+    44,        /* unordi_d */
     8, /* truncr_d_i */
     8, /* truncr_d_l */
     8, /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
     8, /* truncr_d_i */
     8, /* truncr_d_l */
     8, /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    12,        /* movi_d */
+    28,        /* movi_d */
     4, /* ldr_d */
     4, /* ldr_d */
-    12,        /* ldi_d */
+    24,        /* ldi_d */
     8, /* ldxr_d */
     8, /* ldxr_d */
-    4, /* ldxi_d */
+    16,        /* ldxi_d */
     4, /* str_d */
     4, /* str_d */
-    12,        /* sti_d */
+    24,        /* sti_d */
     8, /* stxr_d */
     8, /* stxr_d */
-    4, /* stxi_d */
+    16,        /* stxi_d */
     12,        /* bltr_d */
     12,        /* bltr_d */
-    24,        /* blti_d */
+    36,        /* blti_d */
     12,        /* bler_d */
     12,        /* bler_d */
-    24,        /* blei_d */
+    36,        /* blei_d */
     12,        /* beqr_d */
     12,        /* beqr_d */
-    24,        /* beqi_d */
+    36,        /* beqi_d */
     12,        /* bger_d */
     12,        /* bger_d */
-    24,        /* bgei_d */
+    36,        /* bgei_d */
     12,        /* bgtr_d */
     12,        /* bgtr_d */
-    24,        /* bgti_d */
+    36,        /* bgti_d */
     12,        /* bner_d */
     12,        /* bner_d */
-    24,        /* bnei_d */
+    40,        /* bnei_d */
     12,        /* bunltr_d */
     12,        /* bunltr_d */
-    24,        /* bunlti_d */
+    40,        /* bunlti_d */
     12,        /* bunler_d */
     12,        /* bunler_d */
-    24,        /* bunlei_d */
+    40,        /* bunlei_d */
     12,        /* buneqr_d */
     12,        /* buneqr_d */
-    24,        /* buneqi_d */
+    40,        /* buneqi_d */
     12,        /* bunger_d */
     12,        /* bunger_d */
-    24,        /* bungei_d */
+    40,        /* bungei_d */
     12,        /* bungtr_d */
     12,        /* bungtr_d */
-    24,        /* bungti_d */
+    40,        /* bungti_d */
     12,        /* bltgtr_d */
     12,        /* bltgtr_d */
-    24,        /* bltgti_d */
+    36,        /* bltgti_d */
     12,        /* bordr_d */
     12,        /* bordr_d */
-    24,        /* bordi_d */
+    36,        /* bordi_d */
     12,        /* bunordr_d */
     12,        /* bunordr_d */
-    24,        /* bunordi_d */
+    40,        /* bunordi_d */
     0, /* pushargr_d */
     0, /* pushargi_d */
     0, /* retr_d */
     0, /* pushargr_d */
     0, /* pushargi_d */
     0, /* retr_d */
     0, /* movr_d_ww */
     0, /* movi_d_ww */
     4, /* movr_d_w */
     0, /* movr_d_ww */
     0, /* movi_d_ww */
     4, /* movr_d_w */
-    12,        /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    116,       /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
+    24,        /* movi_d_w */
+    4, /* clo */
+    4, /* clz */
+    72,        /* cto */
+    72,        /* ctz */
 #endif /* __WORDSIZE */
 #endif /* __WORDSIZE */
index d98d94e..6d56423 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
 #  include <sys/cachectl.h>
 #endif
 
 #  include <sys/cachectl.h>
 #endif
 
+#if NEW_ABI
+/*   callee save                                   + variadic arguments
+ *   align16(ra+fp+s[0-7]++f20+f22+f24+f26+f28+f30) + align16(a[0-7]) */
+#  define stack_framesize              (128 + 64)
+#else
+/*   callee save
+ *   align16(ra+fp+s[0-7]+f16+f18+f20+f22+f24+f26+f28+f30) */
+#  define stack_framesize              128
+#endif
+
 #if NEW_ABI
 #  define NUM_WORD_ARGS                        8
 #  define STACK_SLOT                   8
 #if NEW_ABI
 #  define NUM_WORD_ARGS                        8
 #  define STACK_SLOT                   8
@@ -54,12 +64,14 @@ typedef struct jit_pointer_t jit_va_list_t;
 /*
  * Prototypes
  */
 /*
  * Prototypes
  */
-#define jit_make_arg(node)             _jit_make_arg(_jit,node)
-static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*);
+#define jit_make_arg(node,code)                _jit_make_arg(_jit,node,code)
+static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t);
 #define jit_make_arg_f(node)           _jit_make_arg_f(_jit,node)
 static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
 #define jit_make_arg_d(node)           _jit_make_arg_d(_jit,node)
 static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*);
 #define jit_make_arg_f(node)           _jit_make_arg_f(_jit,node)
 static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
 #define jit_make_arg_d(node)           _jit_make_arg_d(_jit,node)
 static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*);
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
@@ -67,11 +79,13 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #  include "jit_rewind.c"
 #  include "jit_mips-cpu.c"
 #  include "jit_mips-fpu.c"
 #  include "jit_rewind.c"
 #  include "jit_mips-cpu.c"
 #  include "jit_mips-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
  * Initialization
  */
 #undef PROTO
 
 /*
  * Initialization
  */
+jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0x01,                  "at" },
     { rc(gpr) | 0x02,                  "v0" },
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0x01,                  "at" },
     { rc(gpr) | 0x02,                  "v0" },
@@ -145,12 +159,49 @@ jit_register_t            _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7
+};
+
+static jit_int32_t fregs[] = {
+#if !NEW_ABI
+    _F16, _F18,
+#endif
+    _F20, _F22, _F24, _F26, _F28, _F30
+};
+
 /*
  * Implementation
  */
 void
 jit_get_cpu(void)
 {
 /*
  * Implementation
  */
 void
 jit_get_cpu(void)
 {
+#if defined(__linux__)
+    FILE       *fp;
+    char       *ptr;
+    char        buf[128];
+
+    if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
+       while (fgets(buf, sizeof(buf), fp)) {
+           if (strncmp(buf, "isa                       : ", 8) == 0) {
+               if ((ptr = strstr(buf + 9, "mips64r")))
+                   jit_cpu.release = strtoul(ptr + 7, NULL, 10);
+               break;
+           }
+       }
+       fclose(fp);
+    }
+#endif
+#if __mips_isa_rev
+    if (!jit_cpu.release)
+       jit_cpu.release = __mips_isa_rev;
+#elif defined _MIPS_ARCH
+    if (!jit_cpu.release)
+       jit_cpu.release = strtoul(&_MIPS_ARCH[4], NULL, 10);
+#elif defined(__mips) && __mips < 6
+    if (!jit_cpu.release)
+       jit_cpu.release = __mips;
+#endif
 }
 
 void
 }
 
 void
@@ -211,6 +262,7 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -259,20 +311,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -332,18 +382,18 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
 #if NEW_ABI
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
 #if NEW_ABI
-    return (jit_arg_reg_p(u->u.w));
+    return (jit_arg_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
 #else
     return (u->u.w < 8);
 #endif
 }
 
 static jit_node_t *
 #else
     return (u->u.w < 8);
 #endif
 }
 
 static jit_node_t *
-_jit_make_arg(jit_state_t *_jit, jit_node_t *node)
+_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code)
 {
     jit_int32_t                 offset;
 #if NEW_ABI
 {
     jit_int32_t                 offset;
 #if NEW_ABI
@@ -355,13 +405,13 @@ _jit_make_arg(jit_state_t *_jit, jit_node_t *node)
     }
 #else
     offset = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
     }
 #else
     offset = (_jitc->function->self.size - stack_framesize) >> STACK_SHIFT;
-    _jitc->function->self.argi = 1;
+    ++_jitc->function->self.argi;
     if (offset >= 4)
        offset = _jitc->function->self.size;
     _jitc->function->self.size += STACK_SLOT;
 #endif
     if (node == (jit_node_t *)0)
     if (offset >= 4)
        offset = _jitc->function->self.size;
     _jitc->function->self.size += STACK_SLOT;
 #endif
     if (node == (jit_node_t *)0)
-       node = jit_new_node(jit_code_arg);
+       node = jit_new_node(code);
     else
        link_node(node);
     node->u.w = offset;
     else
        link_node(node);
     node->u.w = offset;
@@ -469,7 +519,6 @@ _jit_ellipsis(jit_state_t *_jit)
     else {
        assert(!(_jitc->function->self.call & jit_call_varargs));
 #if NEW_ABI
     else {
        assert(!(_jitc->function->self.call & jit_call_varargs));
 #if NEW_ABI
-       /* If varargs start in a register, allocate extra 64 bytes. */
        if (jit_arg_reg_p(_jitc->function->self.argi))
            rewind_prolog();
        /* Do not set during possible rewind. */
        if (jit_arg_reg_p(_jitc->function->self.argi))
            rewind_prolog();
        /* Do not set during possible rewind. */
@@ -482,6 +531,7 @@ _jit_ellipsis(jit_state_t *_jit)
        _jitc->function->vagp = _jitc->function->self.argi;
     }
     jit_inc_synth(ellipsis);
        _jitc->function->vagp = _jitc->function->self.argi;
     }
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare)
        jit_link_prepare();
     else
     if (_jitc->prepare)
        jit_link_prepare();
     else
@@ -498,10 +548,14 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    return (jit_make_arg((jit_node_t*)0));
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
+    return (jit_make_arg((jit_node_t*)0, code));
 }
 
 jit_node_t *
 }
 
 jit_node_t *
@@ -521,55 +575,67 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _A0 - v->u.w);
-    else
-       jit_ldxi_c(u, _FP, v->u.w + C_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_c(u, _FP, v->u.w + C_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _A0 - v->u.w);
-    else
-       jit_ldxi_uc(u, _FP, v->u.w + C_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_uc(u, _FP, v->u.w + C_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _A0 - v->u.w);
-    else
-       jit_ldxi_s(u, _FP, v->u.w + S_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_s(u, _FP, v->u.w + S_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _A0 - v->u.w);
-    else
-       jit_ldxi_us(u, _FP, v->u.w + S_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_us(u, _FP, v->u.w + S_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #if __WORDSIZE == 64
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #if __WORDSIZE == 64
@@ -578,8 +644,11 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr(u, _A0 - v->u.w);
 #endif
     }
        jit_movr(u, _A0 - v->u.w);
 #endif
     }
-    else
-       jit_ldxi_i(u, _FP, v->u.w + I_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_i(u, _FP, v->u.w + I_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -587,52 +656,64 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _A0 - v->u.w);
-    else
-       jit_ldxi_ui(u, _FP, v->u.w + I_DISP);
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, _FP, v->u.w + I_DISP);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _A0 - v->u.w);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _A0 - v->u.w);
-    else
-       jit_ldxi_l(u, _FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, _FP, v->u.w);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 #endif
 
 void
     jit_dec_synth();
 }
 #endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    jit_inc_synth_wp(putargr, u, v);
-    assert(v->code == jit_code_arg);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_A0 - v->u.w, u);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_A0 - v->u.w, u);
-    else
-       jit_stxi(v->u.w + WORD_ADJUST, _FP, u);
+    else {
+       jit_node_t      *node = jit_stxi(v->u.w + WORD_ADJUST, _FP, u);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_A0 - v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_A0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w + WORD_ADJUST, _FP, regno);
+       node = jit_stxi(v->u.w + WORD_ADJUST, _FP, regno);
+       jit_link_alist(node);
+       jit_check_frame();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -647,15 +728,18 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movr_f(u, _F12 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
     if (jit_arg_reg_p(v->u.w))
        jit_movr_f(u, _F12 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
-       jit_movr_w_f(u, _A0 - v->u.w - 8);
+       jit_movr_w_f(u, _A0 - (v->u.w - 8));
 #else
     if (v->u.w < 4)
        jit_movr_w_f(u, _A0 - v->u.w);
     else if (v->u.w < 8)
        jit_movr_f(u, _F12 - ((v->u.w - 4) >> 1));
 #endif
 #else
     if (v->u.w < 4)
        jit_movr_w_f(u, _A0 - v->u.w);
     else if (v->u.w < 8)
        jit_movr_f(u, _F12 - ((v->u.w - 4) >> 1));
 #endif
-    else
-       jit_ldxi_f(u, _FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_f(u, _FP, v->u.w);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -668,15 +752,18 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movr_f(_F12 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
     if (jit_arg_reg_p(v->u.w))
        jit_movr_f(_F12 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
-       jit_movr_f_w(_A0 - v->u.w - 8, u);
+       jit_movr_f_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4)
        jit_movr_f_w(_A0 - v->u.w, u);
     else if (v->u.w < 8)
        jit_movr_f(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
 #else
     if (v->u.w < 4)
        jit_movr_f_w(_A0 - v->u.w, u);
     else if (v->u.w < 8)
        jit_movr_f(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
-    else
-       jit_stxi_f(v->u.w, _FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_f(v->u.w, _FP, u);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -689,12 +776,8 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
 #if NEW_ABI
     if (jit_arg_reg_p(v->u.w))
        jit_movi_f(_F12 - v->u.w, u);
 #if NEW_ABI
     if (jit_arg_reg_p(v->u.w))
        jit_movi_f(_F12 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_f(regno, u);
-       jit_movr_f_w(_A0 - v->u.w - 8, u);
-       jit_unget_reg(regno);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_f_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4) {
        regno = jit_get_reg(jit_class_fpr);
 #else
     if (v->u.w < 4) {
        regno = jit_get_reg(jit_class_fpr);
@@ -706,9 +789,12 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
        jit_movi_f(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
     else {
        jit_movi_f(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, _FP, regno);
+       node = jit_stxi_f(v->u.w, _FP, regno);
+       jit_link_alist(node);
+       jit_check_frame();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -723,15 +809,18 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movr_d(u, _F12 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
     if (jit_arg_reg_p(v->u.w))
        jit_movr_d(u, _F12 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
-       jit_movr_d_w(_A0 - v->u.w - 8, u);
+       jit_movr_d_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4)
        jit_movr_ww_d(u, _A0 - v->u.w, _A0 - (v->u.w + 1));
     else if (v->u.w < 8)
        jit_movr_d(u, _F12 - ((v->u.w - 4) >> 1));
 #endif
 #else
     if (v->u.w < 4)
        jit_movr_ww_d(u, _A0 - v->u.w, _A0 - (v->u.w + 1));
     else if (v->u.w < 8)
        jit_movr_d(u, _F12 - ((v->u.w - 4) >> 1));
 #endif
-    else
-       jit_ldxi_d(u, _FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_d(u, _FP, v->u.w);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -744,15 +833,18 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     if (jit_arg_reg_p(v->u.w))
        jit_movr_d(_F12 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
     if (jit_arg_reg_p(v->u.w))
        jit_movr_d(_F12 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
-       jit_movr_d_w(_A0 - v->u.w - 8, u);
+       jit_movr_d_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4)
        jit_movr_d_ww(_A0 - v->u.w, _A0 - (v->u.w + 1), u);
     else if (v->u.w < 8)
        jit_movr_d(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
 #else
     if (v->u.w < 4)
        jit_movr_d_ww(_A0 - v->u.w, _A0 - (v->u.w + 1), u);
     else if (v->u.w < 8)
        jit_movr_d(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
-    else
-       jit_stxi_d(v->u.w, _FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_d(v->u.w, _FP, u);
+       jit_link_alist(node);
+       jit_check_frame();
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -765,12 +857,8 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 #if NEW_ABI
     if (jit_arg_reg_p(v->u.w))
        jit_movi_d(_F12 - v->u.w, u);
 #if NEW_ABI
     if (jit_arg_reg_p(v->u.w))
        jit_movi_d(_F12 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_d(regno, u);
-       jit_movr_d_w(_A0 - v->u.w - 8, u);
-       jit_unget_reg(regno);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_d_w(_A0 - (v->u.w - 8), u);
 #else
     if (v->u.w < 4) {
        regno = jit_get_reg(jit_class_fpr);
 #else
     if (v->u.w < 4) {
        regno = jit_get_reg(jit_class_fpr);
@@ -782,18 +870,21 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
        jit_movi_d(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
     else {
        jit_movi_d(_F12 - ((v->u.w - 4) >> 1), u);
 #endif
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, _FP, regno);
+       node = jit_stxi_d(v->u.w, _FP, regno);
+       jit_link_alist(node);
+       jit_check_frame();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
 #if NEW_ABI
     assert(_jitc->function);
     jit_link_prepare();
 #if NEW_ABI
     assert(_jitc->function);
@@ -802,6 +893,7 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argi;
     }
     else {
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, u);
        _jitc->function->call.size += STACK_SLOT;
     }
        jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, u);
        _jitc->function->call.size += STACK_SLOT;
     }
@@ -809,25 +901,27 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
     jit_word_t         offset;
     assert(_jitc->function);
     offset = _jitc->function->call.size >> STACK_SHIFT;
     jit_word_t         offset;
     assert(_jitc->function);
     offset = _jitc->function->call.size >> STACK_SHIFT;
-    _jitc->function->call.argi = 1;
+    ++_jitc->function->call.argi;
     if (jit_arg_reg_p(offset))
        jit_movr(_A0 - offset, u);
     if (jit_arg_reg_p(offset))
        jit_movr(_A0 - offset, u);
-    else
+    else {
+       jit_check_frame();
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
+    }
     _jitc->function->call.size += STACK_SLOT;
 #endif
     jit_dec_synth();
 }
 
 void
     _jitc->function->call.size += STACK_SLOT;
 #endif
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                regno;
 #if !NEW_ABI
     jit_word_t         offset;
 #endif
     assert(_jitc->function);
 {
     jit_int32_t                regno;
 #if !NEW_ABI
     jit_word_t         offset;
 #endif
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
 #if NEW_ABI
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
     jit_link_prepare();
 #if NEW_ABI
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -835,6 +929,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        ++_jitc->function->call.argi;
     }
     else {
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, regno);
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        jit_stxi(_jitc->function->call.size + WORD_ADJUST, JIT_SP, regno);
@@ -847,6 +942,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
     if (jit_arg_reg_p(offset))
        jit_movi(_A0 - offset, u);
     else {
     if (jit_arg_reg_p(offset))
        jit_movi(_A0 - offset, u);
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
@@ -875,6 +971,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argi;
     }
     else {
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += STACK_SLOT;
     }
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += STACK_SLOT;
     }
@@ -889,8 +986,10 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argi;
        jit_movr_f_w(_A0 - offset, u);
     }
        ++_jitc->function->call.argi;
        jit_movr_f_w(_A0 - offset, u);
     }
-    else
+    else {
+       jit_check_frame();
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+    }
     _jitc->function->call.size += STACK_SLOT;
 #endif
     jit_dec_synth();
     _jitc->function->call.size += STACK_SLOT;
 #endif
     jit_dec_synth();
@@ -915,6 +1014,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        ++_jitc->function->call.argi;
     }
     else {
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
@@ -933,6 +1033,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        jit_movi_f_w(_A0 - offset, u);
     }
     else {
        jit_movi_f_w(_A0 - offset, u);
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
@@ -962,6 +1063,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
        ++_jitc->function->call.argi;
     }
     else {
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += STACK_SLOT;
     }
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += STACK_SLOT;
     }
@@ -982,8 +1084,10 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
            ++_jitc->function->call.argf;
        }
     }
            ++_jitc->function->call.argf;
        }
     }
-    else
+    else {
+       jit_check_frame();
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+    }
     _jitc->function->call.size += sizeof(jit_float64_t);
 #endif
     jit_dec_synth();
     _jitc->function->call.size += sizeof(jit_float64_t);
 #endif
     jit_dec_synth();
@@ -1009,6 +1113,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        ++_jitc->function->call.argi;
     }
     else {
        ++_jitc->function->call.argi;
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
@@ -1033,6 +1138,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        }
     }
     else {
        }
     }
     else {
+       jit_check_frame();
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
@@ -1070,6 +1176,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *call;
     assert(_jitc->function);
 {
     jit_node_t         *call;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -1090,13 +1197,12 @@ jit_node_t *
 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *call;
 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *call;
-    jit_node_t         *node;
     assert(_jitc->function);
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
-    node = jit_movi(_T9, (jit_word_t)i0);
-    call = jit_callr(_T9);
+    call = jit_calli(i0);
     call->v.w = _jitc->function->call.argi;
 #if NEW_ABI
     call->w.w = call->v.w;
     call->v.w = _jitc->function->call.argi;
 #if NEW_ABI
     call->w.w = call->v.w;
@@ -1107,7 +1213,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
        _jitc->function->call.size = 0;
     _jitc->prepare = 0;
     jit_dec_synth();
        _jitc->function->call.size = 0;
     _jitc->prepare = 0;
     jit_dec_synth();
-    return (node);
+    return (call);
 }
 
 void
 }
 
 void
@@ -1182,9 +1288,11 @@ _emit_code(jit_state_t *_jit)
     jit_word_t          word;
     jit_int32_t                 value;
     jit_int32_t                 offset;
     jit_word_t          word;
     jit_int32_t                 value;
     jit_int32_t                 offset;
+
     struct {
        jit_node_t      *node;
        jit_word_t       word;
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1296,18 +1404,30 @@ _emit_code(jit_state_t *_jit)
        prevw = _jit->pc.w;
 #endif
        value = jit_classify(node->code);
        prevw = _jit->pc.w;
 #endif
        value = jit_classify(node->code);
+#if GET_JIT_SIZE
+       flush();
+#endif
        jit_regarg_set(node, value);
        switch (node->code) {
            case jit_code_align:
                /* Must align to a power of two */
                assert(!(node->u.w & (node->u.w - 1)));
        jit_regarg_set(node, value);
        switch (node->code) {
            case jit_code_align:
                /* Must align to a power of two */
                assert(!(node->u.w & (node->u.w - 1)));
+               flush();
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
+               flush();
+               break;
+           case jit_code_skip:
+               flush();
+               nop((node->u.w + 3) & ~3);
+               flush();
                break;
            case jit_code_note:         case jit_code_name:
                break;
            case jit_code_note:         case jit_code_name:
+               flush();
                node->u.w = _jit->pc.w;
                break;
            case jit_code_label:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_label:
+               flush();
                /* remember label is defined */
                node->flag |= jit_flag_patch;
                node->u.w = _jit->pc.w;
                /* remember label is defined */
                node->flag |= jit_flag_patch;
                node->u.w = _jit->pc.w;
@@ -1461,6 +1581,10 @@ _emit_code(jit_state_t *_jit)
                break;
                case_rr(neg,);
                case_rr(com,);
                break;
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
@@ -1688,6 +1812,7 @@ _emit_code(jit_state_t *_jit)
                case_brr(bunord, _d);
                case_brf(bunord, _d, 64);
            case jit_code_jmpr:
                case_brr(bunord, _d);
                case_brf(bunord, _d, 64);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -1696,16 +1821,24 @@ _emit_code(jit_state_t *_jit)
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
-                       jmpi(temp->u.w);
+                       jmpi(temp->u.w, 0);
                    else {
                    else {
-                       word = jmpi(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (jit_mips2_p() && can_relative_jump_p(word))
+                           word = jmpi(_jit->pc.w, 1);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
-               else
-                   jmpi(node->u.w);
+               else {
+                   jit_check_frame();
+                   jmpi(node->u.w, 0);
+               }
                break;
            case jit_code_callr:
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
@@ -1713,23 +1846,37 @@ _emit_code(jit_state_t *_jit)
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
-                   word = calli_p(temp->u.w);
-                   if (!(temp->flag & jit_flag_patch))
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w, 0);
+                   else {
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (jit_mips2_p() && can_relative_jump_p(word))
+                           word = calli(_jit->pc.w, 1);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                        patch(word, node);
+                   }
+               }
+               else {
+                   jit_check_frame();
+                   calli(node->u.w, 0);
                }
                }
-               else
-                   calli(node->u.w);
                break;
            case jit_code_prolog:
                break;
            case jit_code_prolog:
+               flush();
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
                _jitc->again = 0;
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
                _jitc->again = 0;
+               compute_framesize();
+               patch_alist(0);
                prolog(node);
                break;
            case jit_code_epilog:
                prolog(node);
                break;
            case jit_code_epilog:
@@ -1744,13 +1891,29 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* this will be recomputed but undo anyway to have it
+                    * better self documented.*/
+                   undo.func.need_stack = _jitc->function->need_stack;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                /* remember label is defined */
                    goto restart_function;
                }
                /* remember label is defined */
+               flush();
                node->flag |= jit_flag_patch;
                node->u.w = _jit->pc.w;
                epilog(node);
                node->flag |= jit_flag_patch;
                node->u.w = _jit->pc.w;
                epilog(node);
@@ -1798,14 +1961,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
-           case jit_code_live:
-           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1815,10 +1990,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1848,6 +2039,9 @@ _emit_code(jit_state_t *_jit)
                    break;
            }
        }
                    break;
            }
        }
+#if GET_JIT_SIZE
+       flush();
+#endif
        jit_regarg_clr(node, value);
        assert(_jitc->regarg == 0 ||
               (jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry)));
        jit_regarg_clr(node, value);
        assert(_jitc->regarg == 0 ||
               (jit_carry != _NOREG && _jitc->regarg == (1 << jit_carry)));
@@ -1855,6 +2049,7 @@ _emit_code(jit_state_t *_jit)
        /* update register live state */
        jit_reglive(node);
     }
        /* update register live state */
        jit_reglive(node);
     }
+    flush();
 #undef case_brf
 #undef case_brw
 #undef case_brr
 #undef case_brf
 #undef case_brw
 #undef case_brr
@@ -1881,6 +2076,7 @@ _emit_code(jit_state_t *_jit)
 #  include "jit_rewind.c"
 #  include "jit_mips-cpu.c"
 #  include "jit_mips-fpu.c"
 #  include "jit_rewind.c"
 #  include "jit_mips-cpu.c"
 #  include "jit_mips-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
 #undef CODE
 
 void
@@ -1920,6 +2116,29 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     stxi_d(i0, rn(r0), rn(r1));
 }
 
     stxi_d(i0, rn(r0), rn(r1));
 }
 
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = STACK_SLOT << 1;        /* ra+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += STACK_SLOT;
+
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+
+#if NEW_ABI
+    /* Space to store variadic arguments */
+    if (_jitc->function->self.call & jit_call_varargs)
+       _jitc->framesize += (NUM_WORD_ARGS - _jitc->function->vagp) * STACK_SLOT;
+#endif
+
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
index b663b67..e5985a3 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2014-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2014-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -21,17 +21,27 @@ static char *code_name[] = {
     "data",
     "live",            "align",
     "save",            "load",
     "data",
     "live",            "align",
     "save",            "load",
+    "skip",
     "#name",           "#note",
     "label",
     "prolog",
     "ellipsis",                "va_push",
     "allocai",         "allocar",
     "#name",           "#note",
     "label",
     "prolog",
     "ellipsis",                "va_push",
     "allocai",         "allocar",
-    "arg",
+    "arg_c",
+    "arg_s",
+    "arg_i",
+    "arg_l",
     "getarg_c",                "getarg_uc",
     "getarg_s",                "getarg_us",
     "getarg_i",                "getarg_ui",
     "getarg_l",
     "getarg_c",                "getarg_uc",
     "getarg_s",                "getarg_us",
     "getarg_i",                "getarg_ui",
     "getarg_l",
-    "putargr",         "putargi",
+    "putargr_c",       "putargi_c",
+    "putargr_uc",      "putargi_uc",
+    "putargr_s",       "putargi_s",
+    "putargr_us",      "putargi_us",
+    "putargr_i",       "putargi_i",
+    "putargr_ui",      "putargi_ui",
+    "putargr_l",       "putargi_l",
     "va_start",
     "va_arg",          "va_arg_d",
     "va_end",
     "va_start",
     "va_arg",          "va_arg_d",
     "va_end",
@@ -70,9 +80,12 @@ static char *code_name[] = {
     "ner",             "nei",
     "movr",            "movi",
     "movnr",           "movzr",
     "ner",             "nei",
     "movr",            "movi",
     "movnr",           "movzr",
+    "casr",            "casi",
     "extr_c",          "extr_uc",
     "extr_s",          "extr_us",
     "extr_i",          "extr_ui",
     "extr_c",          "extr_uc",
     "extr_s",          "extr_us",
     "extr_i",          "extr_ui",
+    "bswapr_us",
+    "bswapr_ui",       "bswapr_ul",
     "htonr_us",
     "htonr_ui",                "htonr_ul",
     "ldr_c",           "ldi_c",
     "htonr_us",
     "htonr_ui",                "htonr_ul",
     "ldr_c",           "ldi_c",
@@ -120,10 +133,22 @@ static char *code_name[] = {
     "jmpr",            "jmpi",
     "callr",           "calli",
     "prepare",
     "jmpr",            "jmpi",
     "callr",           "calli",
     "prepare",
-    "pushargr",                "pushargi",
+    "pushargr_c",      "pushargi_c",
+    "pushargr_uc",     "pushargi_uc",
+    "pushargr_s",      "pushargi_s",
+    "pushargr_us",     "pushargi_us",
+    "pushargr_i",      "pushargi_i",
+    "pushargr_ui",     "pushargi_ui",
+    "pushargr_l",      "pushargi_l",
     "finishr",         "finishi",
     "ret",
     "finishr",         "finishi",
     "ret",
-    "retr",            "reti",
+    "retr_c",          "reti_c",
+    "retr_uc",         "reti_uc",
+    "retr_s",          "reti_s",
+    "retr_us",         "reti_us",
+    "retr_i",          "reti_i",
+    "retr_ui",         "reti_ui",
+    "retr_l",          "reti_l",
     "retval_c",                "retval_uc",
     "retval_s",                "retval_us",
     "retval_i",                "retval_ui",
     "retval_c",                "retval_uc",
     "retval_s",                "retval_us",
     "retval_i",                "retval_ui",
@@ -228,7 +253,6 @@ static char *code_name[] = {
     "movr_f_w",                "movi_f_w",
     "movr_d_ww",       "movi_d_ww",
     "movr_d_w",                "movi_d_w",
     "movr_f_w",                "movi_f_w",
     "movr_d_ww",       "movi_d_ww",
     "movr_d_w",                "movi_d_w",
-    "bswapr_us",
-    "bswapr_ui",               "bswapr_ul",
-    "casr",            "casi",
+    "clo",             "clz",
+    "cto",             "ctz",
 };
 };
index f1c149f..b055619 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index f205db0..67874c6 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -202,8 +202,21 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int);
 #  define XCMPLI(cr,l,a,u)             FCI(10,cr,l,a,u)
 #  define CMPLDI(a,s)                  XCMPLI(0,1,a,s)
 #  define CMPLWI(a,s)                  XCMPLI(0,0,a,s)
 #  define XCMPLI(cr,l,a,u)             FCI(10,cr,l,a,u)
 #  define CMPLDI(a,s)                  XCMPLI(0,1,a,s)
 #  define CMPLWI(a,s)                  XCMPLI(0,0,a,s)
+#  if __WORDSIZE == 32
+#  define CMPX(a,b)                    CMPW(a,b)
+#  define CMPXI(a,s)                   CMPWI(a,s)
+#  define CMPLX(a,b)                   CMPLW(a,b)
+#  define CMPLXI(a,s)                  CMPLWI(a,s)
+#  else
+#  define CMPX(a,b)                    CMPD(a,b)
+#  define CMPXI(a,s)                   CMPDI(a,s)
+#  define CMPLX(a,b)                   CMPLD(a,b)
+#  define CMPLXI(a,s)                  CMPLDI(a,s)
+#  endif
 #  define CNTLZW(a,s)                  FX(31,s,a,0,26)
 #  define CNTLZW_(a,s)                 FX_(31,s,a,0,26)
 #  define CNTLZW(a,s)                  FX(31,s,a,0,26)
 #  define CNTLZW_(a,s)                 FX_(31,s,a,0,26)
+#  define CNTLZD(a,s)                  FX(31,s,a,0,58)
+#  define CNTLZD_(a,s)                 FX_(31,s,a,0,58)
 #  define CRAND(d,a,b)                 FX(19,d,a,b,257)
 #  define CRANDC(d,a,b)                        FX(19,d,a,b,129)
 #  define CREQV(d,a,b)                 FX(19,d,a,b,289)
 #  define CRAND(d,a,b)                 FX(19,d,a,b,257)
 #  define CRANDC(d,a,b)                        FX(19,d,a,b,129)
 #  define CREQV(d,a,b)                 FX(19,d,a,b,289)
@@ -520,6 +533,19 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 #define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define negr(r0,r1)                  NEG(r0,r1)
 #  define comr(r0,r1)                  NOT(r0,r1)
 #define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define negr(r0,r1)                  NEG(r0,r1)
 #  define comr(r0,r1)                  NOT(r0,r1)
+#  define bitswap(r0, r1)              _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  if __WORDSIZE == 32
+#    define clzr(r0, r1)               CNTLZW(r0, r1)
+#  else
+#    define clzr(r0, r1)               CNTLZD(r0, r1)
+#  endif
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define extr_c(r0,r1)                        EXTSB(r0,r1)
 #  define extr_uc(r0,r1)               ANDI_(r0,r1,0xff)
 #  define extr_s(r0,r1)                        EXTSH(r0,r1)
 #  define extr_c(r0,r1)                        EXTSB(r0,r1)
 #  define extr_uc(r0,r1)               ANDI_(r0,r1,0xff)
 #  define extr_s(r0,r1)                        EXTSH(r0,r1)
@@ -858,14 +884,14 @@ static jit_word_t _jmpi_p(jit_state_t*,jit_word_t) maybe_unused;
 #    define callr(r0,i0)               _callr(_jit,r0,i0)
 static void _callr(jit_state_t*,jit_int32_t,jit_int32_t);
 #    define calli(i0,i1)               _calli(_jit,i0,i1)
 #    define callr(r0,i0)               _callr(_jit,r0,i0)
 static void _callr(jit_state_t*,jit_int32_t,jit_int32_t);
 #    define calli(i0,i1)               _calli(_jit,i0,i1)
-static void _calli(jit_state_t*,jit_word_t,jit_int32_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t,jit_int32_t);
 #  define calli_p(i0,i1)               _calli_p(_jit,i0,i1)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_int32_t);
 #  else
 #    define callr(r0)                  _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
 #    define calli(i0)                  _calli(_jit,i0)
 #  define calli_p(i0,i1)               _calli_p(_jit,i0,i1)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t,jit_int32_t);
 #  else
 #    define callr(r0)                  _callr(_jit,r0)
 static void _callr(jit_state_t*,jit_int32_t);
 #    define calli(i0)                  _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
 #    define calli_p(i0)                        _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #endif
 #    define calli_p(i0)                        _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #endif
@@ -1125,7 +1151,7 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 static void
 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPWI(r2, 0);
+    CMPXI(r2, 0);
     BEQ(8);
     MR(r0, r1);
 }
     BEQ(8);
     MR(r0, r1);
 }
@@ -1133,7 +1159,7 @@ _movnr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 static void
 _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPWI(r2, 0);
+    CMPXI(r2, 0);
     BNE(8);
     MR(r0, r1);
 }
     BNE(8);
     MR(r0, r1);
 }
@@ -1194,6 +1220,94 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
        jit_unget_reg(r1_reg);
 }
 
        jit_unget_reg(r1_reg);
 }
 
+/* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
+/*
+unsigned int v; // 32-bit word to reverse bit order
+
+// swap odd and even bits
+v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
+// swap consecutive pairs
+v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
+// swap nibbles ... 
+v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
+// swap bytes
+v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
+// swap 2-byte long pairs
+v = ( v >> 16             ) | ( v               << 16);
+ */
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1, t2, t3, t4;
+    movr(r0, r1);
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+    rshi_u(rn(t1), r0, 1);             /* t1 = v >> 1 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 1);           /* t2 <<= 1 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+    rshi_u(rn(t1), r0, 2);             /* t1 = v >> 2 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 2);           /* t2 <<= 2 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+    rshi_u(rn(t1), r0, 4);             /* t1 = v >> 4 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 4);           /* t2 <<= 4 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ?  0x00ff00ffL : 0x00ff00ff00ff00ffL);
+    rshi_u(rn(t1), r0, 8);             /* t1 = v >> 8 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 8);           /* t2 <<= 8 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  if __WORDSIZE == 32
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    lshi(rn(t2), r0, 16);              /* t2 = v << 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  else
+    movi(rn(t0), 0x0000ffff0000ffffL);
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 16);          /* t2 <<= 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    rshi_u(rn(t1), r0, 32);            /* t1 = v >> 32 */
+    lshi(rn(t2), r0, 32);              /* t2 = v << 32 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  endif
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    bitswap(r0, r1);
+    clor(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    bitswap(r0, r1);
+    clzr(r0, r0);
+}
+
 static void
 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
 {
 static void
 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t no_flag)
 {
@@ -1627,7 +1741,7 @@ _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_LT);
 }
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_LT);
 }
@@ -1637,11 +1751,11 @@ _lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     MFCR(r0);
        jit_unget_reg(reg);
     }
     MFCR(r0);
@@ -1675,7 +1789,7 @@ _lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     CRNOT(CR_GT, CR_GT);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_GT);
     CRNOT(CR_GT, CR_GT);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_GT);
@@ -1686,11 +1800,11 @@ _lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     CRNOT(CR_GT, CR_GT);
        jit_unget_reg(reg);
     }
     CRNOT(CR_GT, CR_GT);
@@ -1727,7 +1841,7 @@ _lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_EQ);
 }
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_EQ);
 }
@@ -1737,13 +1851,13 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else if (can_zero_extend_short_p(i0))
        CMPLWI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
     else if (can_zero_extend_short_p(i0))
        CMPLWI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     MFCR(r0);
        jit_unget_reg(reg);
     }
     MFCR(r0);
@@ -1753,7 +1867,7 @@ _eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     CRNOT(CR_LT, CR_LT);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_LT);
     CRNOT(CR_LT, CR_LT);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_LT);
@@ -1764,11 +1878,11 @@ _gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     CRNOT(CR_LT, CR_LT);
        jit_unget_reg(reg);
     }
     CRNOT(CR_LT, CR_LT);
@@ -1805,7 +1919,7 @@ _gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_GT);
 }
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_GT);
 }
@@ -1815,11 +1929,11 @@ _gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     MFCR(r0);
        jit_unget_reg(reg);
     }
     MFCR(r0);
@@ -1853,7 +1967,7 @@ _gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 static void
 _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    CMPW(r1, r2);
+    CMPX(r1, r2);
     CRNOT(CR_EQ, CR_EQ);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_EQ);
     CRNOT(CR_EQ, CR_EQ);
     MFCR(r0);
     EXTRWI(r0, r0, 1, CR_EQ);
@@ -1864,13 +1978,13 @@ _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
 {
     jit_int32_t                reg;
     if (can_sign_extend_short_p(i0))
-       CMPWI(r1, i0);
+       CMPXI(r1, i0);
     else if (can_zero_extend_short_p(i0))
        CMPLWI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
     else if (can_zero_extend_short_p(i0))
        CMPLWI(r1, i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
-       CMPW(r1, rn(reg));
+       CMPX(r1, rn(reg));
        jit_unget_reg(reg);
     }
     CRNOT(CR_EQ, CR_EQ);
        jit_unget_reg(reg);
     }
     CRNOT(CR_EQ, CR_EQ);
@@ -1882,7 +1996,7 @@ static jit_word_t
 _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
 _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BLT(d);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BLT(d);
@@ -1895,11 +2009,11 @@ _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -1942,7 +2056,7 @@ static jit_word_t
 _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
 _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BLE(d);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BLE(d);
@@ -1955,11 +2069,11 @@ _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2002,7 +2116,7 @@ static jit_word_t
 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BEQ(d);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BEQ(d);
@@ -2015,13 +2129,13 @@ _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else if (can_zero_extend_short_p(i1))
        CMPLWI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
     else if (can_zero_extend_short_p(i1))
        CMPLWI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2034,7 +2148,7 @@ static jit_word_t
 _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
 _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BGE(d);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BGE(d);
@@ -2047,11 +2161,11 @@ _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2094,7 +2208,7 @@ static jit_word_t
 _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
 _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BGT(d);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BGT(d);
@@ -2107,11 +2221,11 @@ _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2154,7 +2268,7 @@ static jit_word_t
 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    CMPW(r0, r1);
+    CMPX(r0, r1);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BNE(d);
     w = _jit->pc.w;
     d = (i0 - w) & ~3;
     BNE(d);
@@ -2167,13 +2281,13 @@ _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
     jit_int32_t                reg;
     jit_word_t         d, w;
     if (can_sign_extend_short_p(i1))
-       CMPWI(r0, i1);
+       CMPXI(r0, i1);
     else if (can_zero_extend_short_p(i1))
        CMPLWI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
     else if (can_zero_extend_short_p(i1))
        CMPLWI(r0, i1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i1);
-       CMPW(r0, rn(reg));
+       CMPX(r0, rn(reg));
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
        jit_unget_reg(reg);
     }
     w = _jit->pc.w;
@@ -2772,7 +2886,7 @@ _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     jit_int32_t                reg;
     if (r1 == _R0_REGNO) {
        if (r2 != _R0_REGNO)
     jit_int32_t                reg;
     if (r1 == _R0_REGNO) {
        if (r2 != _R0_REGNO)
-           LWZX(r0, r2, r1);
+           LWAX(r0, r2, r1);
        else {
            reg = jit_get_reg(jit_class_gpr);
            movr(rn(reg), r1);
        else {
            reg = jit_get_reg(jit_class_gpr);
            movr(rn(reg), r1);
@@ -2781,7 +2895,7 @@ _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        }
     }
     else
        }
     }
     else
-       LWZX(r0, r1, r2);
+       LWAX(r0, r1, r2);
 }
 
 static void
 }
 
 static void
@@ -3301,24 +3415,28 @@ _callr(jit_state_t *_jit, jit_int32_t r0
 }
 
 /* assume fixed address or reachable address */
 }
 
 /* assume fixed address or reachable address */
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0
 #  if _CALL_SYSV
        , jit_int32_t varargs
 #  endif
        )
 {
 _calli(jit_state_t *_jit, jit_word_t i0
 #  if _CALL_SYSV
        , jit_int32_t varargs
 #  endif
        )
 {
+    jit_word_t         w;
 #  if _CALL_SYSV
     jit_word_t         d;
     d = (i0 - _jit->pc.w - !!varargs * 4) & ~3;
     if (can_sign_extend_jump_p(d)) {
 #  if _CALL_SYSV
     jit_word_t         d;
     d = (i0 - _jit->pc.w - !!varargs * 4) & ~3;
     if (can_sign_extend_jump_p(d)) {
-        /* Tell double arguments were passed in registers. */
-        if (varargs)
-            CREQV(6, 6, 6);
-        BL(d);
-    } else
+       /* Tell double arguments were passed in registers. */
+       if (varargs)
+           CREQV(6, 6, 6);
+       w = _jit->pc.w;
+       BL(d);
+    }
+    else
 #  endif
     {
 #  endif
     {
+       w = _jit->pc.w;
        movi(_R12_REGNO, i0);
        callr(_R12_REGNO
 #  if _CALL_SYSV
        movi(_R12_REGNO, i0);
        callr(_R12_REGNO
 #  if _CALL_SYSV
@@ -3326,6 +3444,7 @@ _calli(jit_state_t *_jit, jit_word_t i0
 #  endif
              );
     }
 #  endif
              );
     }
+    return (w);
 }
 
 /* absolute jump */
 }
 
 /* absolute jump */
@@ -3649,7 +3768,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
            if (!can_sign_extend_short_p(d)) {
                /* use absolute address */
                assert(can_sign_extend_short_p(label));
            if (!can_sign_extend_short_p(d)) {
                /* use absolute address */
                assert(can_sign_extend_short_p(label));
-               d |= 2;
+               d = label | 2;
            }
            u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe);
            break;
            }
            u.i[0] = (u.i[0] & ~0xfffd) | (d & 0xfffe);
            break;
@@ -3677,9 +3796,9 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
            if (!can_sign_extend_jump_p(d)) {
                /* use absolute address */
                assert(can_sign_extend_jump_p(label));
            if (!can_sign_extend_jump_p(d)) {
                /* use absolute address */
                assert(can_sign_extend_jump_p(label));
-               d |= 2;
+               d = label | 2;
            }
            }
-           u.i[0] = (u.i[0] & ~0x3fffffd) | (d & 0x3fffffe);
+           u.i[0] = (u.i[0] & ~0x3fffffc) | (d & 0x3fffffd);
            break;
        case 15:                                        /* LI */
 #if __WORDSIZE == 32
            break;
        case 15:                                        /* LI */
 #if __WORDSIZE == 32
index a2edbd8..12631cd 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index 212e637..c8a4471 100644 (file)
@@ -1,22 +1,26 @@
 #if __WORDSIZE == 32
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __BIG_ENDIAN
 #if __WORDSIZE == 32
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __BIG_ENDIAN
-#if _CALL_SYSV
-#define JIT_INSTR_MAX 124
+#if !_CALL_SYSV
+#define JIT_INSTR_MAX 136
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    0, /* align */
+    20,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
-    124,       /* prolog */
+    136,       /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    36,        /* va_start */
-    52,        /* va_arg */
-    64,        /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     8, /* movi */
     12,        /* movnr */
     12,        /* movzr */
     8, /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    36,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     16,        /* bxsubi_u */
     8, /* jmpr */
     4, /* jmpi */
     16,        /* bxsubi_u */
     8, /* jmpr */
     4, /* jmpi */
-    12,        /* callr */
-    20,        /* calli */
+    28,        /* callr */
+    36,        /* calli */
     0, /* prepare */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     36,        /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
     36,        /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    24,        /* movi_d */
+    28,        /* movi_d */
     4, /* ldr_d */
     8, /* ldi_d */
     4, /* ldxr_d */
     4, /* ldr_d */
     8, /* ldi_d */
     4, /* ldxr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    8, /* bswapr_us */
-    16,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
-#endif /* _CALL_SYSV */
+    8, /* clo */
+    4, /* clz */
+    136,       /* cto */
+    132,       /* ctz */
+#endif /* !_CALL_SYSV */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 #if __WORDSIZE == 32
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __BIG_ENDIAN
 #if __WORDSIZE == 32
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __BIG_ENDIAN
-#if !_CALL_SYSV
+#if _CALL_SYSV
 #define JIT_INSTR_MAX 136
     0, /* data */
     0, /* live */
 #define JIT_INSTR_MAX 136
     0, /* data */
     0, /* live */
-    0, /* align */
+    28,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
-    136,       /* prolog */
+    124,       /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    4, /* va_start */
-    8, /* va_arg */
-    8, /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    36,        /* va_start */
+    52,        /* va_arg */
+    64,        /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     12,        /* remr_u */
     20,        /* remi_u */
     4, /* andr */
     12,        /* remr_u */
     20,        /* remi_u */
     4, /* andr */
-    12,        /* andi */
+    4, /* andi */
     4, /* orr */
     12,        /* ori */
     4, /* xorr */
     4, /* orr */
     12,        /* ori */
     4, /* xorr */
     16,        /* nei */
     4, /* movr */
     8, /* movi */
     16,        /* nei */
     4, /* movr */
     8, /* movi */
-    12,  /* movnr */
-    12,  /* movzr */
+    12,        /* movnr */
+    12,        /* movzr */
+    36,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    0, /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     16,        /* bxsubi_u */
     8, /* jmpr */
     4, /* jmpi */
     16,        /* bxsubi_u */
     8, /* jmpr */
     4, /* jmpi */
-    28,        /* callr */
-    40,        /* calli */
+    12,        /* callr */
+    20,        /* calli */
     0, /* prepare */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     24,        /* unordi_f */
     12,        /* truncr_f_i */
     0, /* truncr_f_l */
     24,        /* unordi_f */
     12,        /* truncr_f_i */
     0, /* truncr_f_l */
-    20,        /* extr_f */
+    36,        /* extr_f */
     4, /* extr_d_f */
     4, /* movr_f */
     12,        /* movi_f */
     4, /* extr_d_f */
     4, /* movr_f */
     12,        /* movi_f */
     32,        /* unordi_d */
     12,        /* truncr_d_i */
     0, /* truncr_d_l */
     32,        /* unordi_d */
     12,        /* truncr_d_i */
     0, /* truncr_d_l */
-    20,        /* extr_d */
+    36,        /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    24,        /* movi_d */
+    28,        /* movi_d */
     4, /* ldr_d */
     8, /* ldi_d */
     4, /* ldxr_d */
     4, /* ldr_d */
     8, /* ldi_d */
     4, /* ldxr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    16,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
-#endif /* _CALL_AIX */
-#endif /* __BYTEORDER */
+    8, /* clo */
+    4, /* clz */
+    136,       /* cto */
+    132,       /* ctz */
+#endif /* _CALL_SYSV */
+#endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __BIG_ENDIAN
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __BIG_ENDIAN
-#define JIT_INSTR_MAX 148
+#define JIT_INSTR_MAX 236
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    4, /* align */
+    28,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     36,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
     36,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    36,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    44,        /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     4, /* htonr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     4, /* htonr_ul */
     28,        /* callr */
     52,        /* calli */
     0, /* prepare */
     28,        /* callr */
     52,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    8, /* bswapr_us */
-    16,        /* bswapr_ui */
-    44,        /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    236,       /* cto */
+    232,       /* ctz */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 #if __WORDSIZE == 64
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 #if __WORDSIZE == 64
 #if defined(__powerpc__)
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-#define JIT_INSTR_MAX 124
+#define JIT_INSTR_MAX 236
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    4, /* align */
+    20,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     36,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
     36,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    36,        /* casr */
+    44,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    8, /* bswapr_us */
+    16,        /* bswapr_ui */
+    44,        /* bswapr_ul */
     8, /* htonr_us */
     16,        /* htonr_ui */
     44,        /* htonr_ul */
     8, /* htonr_us */
     16,        /* htonr_ui */
     44,        /* htonr_ul */
     12,        /* callr */
     32,        /* calli */
     0, /* prepare */
     12,        /* callr */
     32,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    8, /* bswapr_us */
-    16,        /* bswapr_ui */
-    44,        /* bswapr_ul */
-    36,        /* casr */
-    44,        /* casi */
+    8, /* clo */
+    4, /* clz */
+    236,       /* cto */
+    232,       /* ctz */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
index 5d2b74b..869e876 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -291,20 +291,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -364,7 +362,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -404,12 +402,16 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     jit_bool_t          incr = 1;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     jit_bool_t          incr = 1;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi++;
 #if _CALL_SYSV
     if (jit_arg_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi++;
 #if _CALL_SYSV
@@ -420,7 +422,7 @@ _jit_arg(jit_state_t *_jit)
        offset = _jitc->function->self.size;
     if (incr)
        _jitc->function->self.size += sizeof(jit_word_t);
        offset = _jitc->function->self.size;
     if (incr)
        _jitc->function->self.size += sizeof(jit_word_t);
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -498,7 +500,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
@@ -510,7 +512,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
@@ -522,7 +524,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
@@ -534,7 +536,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
@@ -546,7 +548,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #if __WORDSIZE == 32
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #if __WORDSIZE == 32
@@ -564,7 +566,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, JIT_RA0 - v->u.w);
@@ -576,7 +578,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
@@ -587,10 +589,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 #endif
 
 void
 #endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     else
     if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     else
@@ -599,11 +601,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    jit_inc_synth_wp(putargi, u, v);
-    assert(v->code == jit_code_arg);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else {
@@ -698,11 +700,11 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     jit_bool_t         incr = 1;
     assert(_jitc->function);
 {
     jit_bool_t         incr = 1;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
@@ -719,12 +721,12 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     jit_bool_t          incr = 1;
     assert(_jitc->function);
 {
     jit_int32_t                 regno;
     jit_bool_t          incr = 1;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
@@ -1153,6 +1155,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1293,6 +1296,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1368,6 +1374,10 @@ _emit_code(jit_state_t *_jit)
 #  endif
                case_rr(neg,);
                case_rr(com,);
 #  endif
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
            case jit_code_casr:
                casr(rn(node->u.w), rn(node->v.w),
                     rn(node->w.q.l), rn(node->w.q.h));
            case jit_code_casr:
                casr(rn(node->u.w), rn(node->v.w),
                     rn(node->w.q.l), rn(node->w.q.h));
@@ -1691,7 +1701,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (can_sign_extend_jump_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
@@ -1699,36 +1714,45 @@ _emit_code(jit_state_t *_jit)
                    jmpi(node->u.w);
                break;
            case jit_code_callr:
                    jmpi(node->u.w);
                break;
            case jit_code_callr:
-               callr(rn(node->u.w)
 #if _CALL_SYSV
 #if _CALL_SYSV
-                     , !!(node->flag & jit_flag_varargs)
+#  define xcallr(u, v)         callr(u, v)
+#  define xcalli_p(u, v)       calli_p(u, v)
+#  define xcalli(u, v)         calli(u, v)
+#else
+#  define xcallr(u, v)         callr(u)
+#  define xcalli_p(u, v)       calli_p(u)
+#  define xcalli(u, v)         calli(u)
 #endif
 #endif
-                     );
+               xcallr(rn(node->u.w), !!(node->flag & jit_flag_varargs));
                break;
            case jit_code_calli:
                break;
            case jit_code_calli:
+               value = !!(node->flag & jit_flag_varargs);
                if (node->flag & jit_flag_node) {
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                if (node->flag & jit_flag_node) {
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
-                   word = calli_p(temp->u.w
+                   if (temp->flag & jit_flag_patch)
+                       xcalli(temp->u.w, value);
+                   else {
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
 #if _CALL_SYSV
 #if _CALL_SYSV
-                                  , !!(node->flag & jit_flag_varargs)
+                       if (can_sign_extend_jump_p(word + value * 4))
+                           word = xcalli(_jit->pc.w, value);
+                       else
 #endif
 #endif
-                                  );
-                   if (!(temp->flag & jit_flag_patch))
+                           word = xcalli_p(_jit->pc.w, value);
                        patch(word, node);
                        patch(word, node);
+                   }
                }
                else
                }
                else
-                   calli(node->u.w
-#if _CALL_SYSV
-                         , !!(node->flag & jit_flag_varargs)
-#endif
-                         );
+                   xcalli(node->u.w, value);
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1772,6 +1796,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1796,14 +1830,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
            case jit_code_va_arg_d:
                vaarg_d(rn(node->u.w), rn(node->v.w));
                break;
-           case jit_code_live:
-           case jit_code_arg:                  case jit_code_ellipsis:
+           case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1813,10 +1859,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
index a6f9338..f3409fb 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
 
 #if __WORDSIZE == 32
 #  define MININT                0x80000000
 
 #if __WORDSIZE == 32
 #  define MININT                0x80000000
+#  define DEC_FMT              "%d"
+#  define HEX_FMT              "0x%x"
 #else
 #  define MININT                0x8000000000000000
 #else
 #  define MININT                0x8000000000000000
+#  define DEC_FMT              "%ld"
+#  define HEX_FMT              "0x%lx"
 #endif
 
 
 #endif
 
 
 #define print_hex(value)                                               \
     do {                                                               \
        if (value < 0 && value != MININT)                               \
 #define print_hex(value)                                               \
     do {                                                               \
        if (value < 0 && value != MININT)                               \
-           fprintf(print_stream, "-0x%lx", -value);                    \
+           fprintf(print_stream, "-" HEX_FMT, (jit_uword_t)-value);    \
        else                                                            \
        else                                                            \
-           fprintf(print_stream, "0x%lx", value);                      \
+           fprintf(print_stream, HEX_FMT, (jit_uword_t)value);         \
     } while (0)
     } while (0)
-#define print_dec(value)               fprintf(print_stream, "%ld", value)
+#define print_dec(value)               fprintf(print_stream, DEC_FMT, value)
 #define print_flt(value)               fprintf(print_stream, "%g", value)
 #define print_str(value)               fprintf(print_stream, "%s", value)
 #define print_ptr(value)               fprintf(print_stream, "%p", value)
 #define print_flt(value)               fprintf(print_stream, "%g", value)
 #define print_str(value)               fprintf(print_stream, "%s", value)
 #define print_ptr(value)               fprintf(print_stream, "%p", value)
index 89e9491..8da8021 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2015-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2015-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -54,13 +54,9 @@ _rewind_prolog(jit_state_t *_jit)
     _jitc->function->self.size = stack_framesize;
 #if __arm__
     assert(jit_cpu.abi);
     _jitc->function->self.size = stack_framesize;
 #if __arm__
     assert(jit_cpu.abi);
-    _jitc->function->self.size += 64;
-#endif
-#if __mips__ && NEW_ABI
-    /* Only add extra stack space if there are varargs
-     * arguments in registers. */
-    assert(jit_arg_reg_p(_jitc->function->self.argi));
-    _jitc->function->self.size += 64;
+    _jitc->function->alist = NULL;
+#elif __mips__
+    _jitc->function->alist = NULL;
 #endif
     _jitc->function->self.argi =
        _jitc->function->self.argf = _jitc->function->self.argn = 0;
 #endif
     _jitc->function->self.argi =
        _jitc->function->self.argf = _jitc->function->self.argn = 0;
@@ -71,9 +67,10 @@ _rewind_prolog(jit_state_t *_jit)
     for (; node; node = next) {
        next = node->next;
        switch (node->code) {
     for (; node; node = next) {
        next = node->next;
        switch (node->code) {
-           case jit_code_arg:
+           case jit_code_arg_c:        case jit_code_arg_s:
+           case jit_code_arg_i:        case jit_code_arg_l:
                node->next = (jit_node_t *)0;
                node->next = (jit_node_t *)0;
-               jit_make_arg(node);
+               jit_make_arg(node, node->code);
                break;
            case jit_code_arg_f:
                node->next = (jit_node_t *)0;
                break;
            case jit_code_arg_f:
                node->next = (jit_node_t *)0;
index 2ae11b9..4fd35a8 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2019-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -93,10 +93,6 @@ typedef union {
 #  undef ui
 } instr_t;
 #  define ii(i)                                *_jit->pc.ui++ = i
 #  undef ui
 } instr_t;
 #  define ii(i)                                *_jit->pc.ui++ = i
-/* FIXME could jit_rewind_prolog() to only use extra 64 bytes
- * if a variadic jit function that have variadic arguments in
- * registers */
-#  define stack_framesize              (200 + 64)
 #  define ldr(r0, r1)                  ldr_l(r0, r1)
 #  define ldi(r0, im)                  ldi_l(r0, im)
 #  define ldxr(r0, r1, r2)             ldxr_l(r0, r1, r2)
 #  define ldr(r0, r1)                  ldr_l(r0, r1)
 #  define ldi(r0, im)                  ldi_l(r0, im)
 #  define ldxr(r0, r1, r2)             ldxr_l(r0, r1, r2)
@@ -579,12 +575,12 @@ static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #  define jmpr(r0)                     JALR(_ZERO_REGNO, r0, 0)
 #  define jmpi(im)                     _jmpi(_jit, im)
 static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #  define jmpr(r0)                     JALR(_ZERO_REGNO, r0, 0)
 #  define jmpi(im)                     _jmpi(_jit, im)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #  define jmpi_p(im)                   _jmpi_p(_jit, im)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    JALR(_RA_REGNO, r0, 0)
 #  define calli(im)                    _calli(_jit, im)
 #  define jmpi_p(im)                   _jmpi_p(_jit, im)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    JALR(_RA_REGNO, r0, 0)
 #  define calli(im)                    _calli(_jit, im)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
 #  define calli_p(im)          _calli_p(_jit, im)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(i0)                   _prolog(_jit,i0)
 #  define calli_p(im)          _calli_p(_jit, im)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(i0)                   _prolog(_jit,i0)
@@ -2087,12 +2083,13 @@ _bmci(jit_state_t *_jit, jit_word_t br, jit_int32_t r0, jit_word_t i0)
     return (w);
 }
 
     return (w);
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
     jit_int32_t                t0;
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
     jit_int32_t                t0;
-    jit_word_t         dsp;
-    dsp = i0 - _jit->pc.w;
+    jit_word_t         dsp, w;
+    w = _jit->pc.w;
+    dsp = i0 - w;
     if (simm20_p(dsp))
        JAL(_ZERO_REGNO, dsp);
     else {
     if (simm20_p(dsp))
        JAL(_ZERO_REGNO, dsp);
     else {
@@ -2101,6 +2098,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0)
        jmpr(rn(t0));
        jit_unget_reg(t0);
     }
        jmpr(rn(t0));
        jit_unget_reg(t0);
     }
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -2115,12 +2113,13 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
     return (w);
 }
 
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
     jit_int32_t                t0;
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
     jit_int32_t                t0;
-    jit_word_t         dsp;
-    dsp = i0 - _jit->pc.w;
+    jit_word_t         dsp, w;
+    w = _jit->pc.w;
+    dsp = i0 - w;
     if (simm20_p(dsp))
        JAL(_RA_REGNO, dsp);
     else {
     if (simm20_p(dsp))
        JAL(_RA_REGNO, dsp);
     else {
@@ -2129,6 +2128,7 @@ _calli(jit_state_t *_jit, jit_word_t i0)
        callr(rn(t0));
        jit_unget_reg(t0);
     }
        callr(rn(t0));
        jit_unget_reg(t0);
     }
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -2146,9 +2146,10 @@ _calli_p(jit_state_t *_jit, jit_word_t i0)
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -2159,56 +2160,41 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
     _jitc->function->stack = ((_jitc->function->self.alen -
                              /* align stack at 16 bytes */
                              _jitc->function->self.aoff) + 15) & -16;
-    subi(_SP_REGNO, _SP_REGNO, stack_framesize);
-    stxi(0, _SP_REGNO, _RA_REGNO);
-    stxi(8, _SP_REGNO, _FP_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
-       stxi(16, _SP_REGNO, 9);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
-       stxi(24, _SP_REGNO, 18);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
-       stxi(32, _SP_REGNO, 19);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
-       stxi(40, _SP_REGNO, 20);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
-       stxi(48, _SP_REGNO, 21);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
-       stxi(56, _SP_REGNO, 22);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
-       stxi(64, _SP_REGNO, 23);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
-       stxi(72, _SP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S9))
-       stxi(80, _SP_REGNO, 25);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S10))
-       stxi(88, _SP_REGNO, 26);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S11))
-       stxi(96, _SP_REGNO, 27);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
-       stxi_d(104, _SP_REGNO, 8);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
-       stxi_d(112, _SP_REGNO, 9);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
-       stxi_d(120, _SP_REGNO, 18);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
-       stxi_d(128, _SP_REGNO, 19);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
-       stxi_d(136, _SP_REGNO, 20);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
-       stxi_d(144, _SP_REGNO, 21);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
-       stxi_d(152, _SP_REGNO, 22);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
-       stxi_d(160, _SP_REGNO, 23);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS8))
-       stxi_d(168, _SP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS9))
-       stxi_d(176, _SP_REGNO, 25);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS10))
-       stxi_d(184, _SP_REGNO, 26);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS11))
-       stxi_d(192, _SP_REGNO, 27);
-    movr(_FP_REGNO, _SP_REGNO);
+
+    if (_jitc->function->stack)
+       _jitc->function->need_stack = 1;
+    if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               _jitc->function->need_stack = 1;
+               break;
+           }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       subi(_SP_REGNO, _SP_REGNO, jit_framesize());
+    if (_jitc->function->need_frame) {
+       stxi(0, _SP_REGNO, _RA_REGNO);
+       stxi(8, _SP_REGNO, _FP_REGNO);
+    }
+    /* callee save registers */
+    for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           stxi(offs, _SP_REGNO, rn(iregs[reg]));
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           stxi_d(offs, _SP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame)
+       movr(_FP_REGNO, _SP_REGNO);
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
     if (_jitc->function->stack)
        subi(_SP_REGNO, _SP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
@@ -2219,7 +2205,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
     }
     if (_jitc->function->self.call & jit_call_varargs) {
        for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
     }
     if (_jitc->function->self.call & jit_call_varargs) {
        for (reg = _jitc->function->vagp; jit_arg_reg_p(reg); ++reg)
-           stxi(stack_framesize - ((8 - reg) * 8),
+           stxi(jit_framesize() - ((8 - reg) * 8),
                 _FP_REGNO, rn(JIT_RA0 - reg));
     }
 }
                 _FP_REGNO, rn(JIT_RA0 - reg));
     }
 }
@@ -2227,58 +2213,31 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, offs;
     if (_jitc->function->assume_frame)
        return;
     if (_jitc->function->assume_frame)
        return;
-    movr(_SP_REGNO, _FP_REGNO);
-    ldxi(_RA_REGNO, _SP_REGNO, 0);
-    ldxi(_FP_REGNO, _SP_REGNO, 8);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S1))
-       ldxi(9, _SP_REGNO, 16);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S2))
-       ldxi(18, _SP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S3))
-       ldxi(19, _SP_REGNO, 32);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S4))
-       ldxi(20, _SP_REGNO, 40);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S5))
-       ldxi(21, _SP_REGNO, 48);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S6))
-       ldxi(22, _SP_REGNO, 56);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S7))
-       ldxi(23, _SP_REGNO, 64);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S8))
-       ldxi(24, _SP_REGNO, 72);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S9))
-       ldxi(25, _SP_REGNO, 80);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S10))
-       ldxi(26, _SP_REGNO, 88);
-    if (jit_regset_tstbit(&_jitc->function->regset, _S11))
-       ldxi(27, _SP_REGNO, 96);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS0))
-       ldxi_d(8, _SP_REGNO, 104);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS1))
-       ldxi_d(9, _SP_REGNO, 112);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS2))
-       ldxi_d(18, _SP_REGNO, 120);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS3))
-       ldxi_d(19, _SP_REGNO, 128);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS4))
-       ldxi_d(20, _SP_REGNO, 136);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS5))
-       ldxi_d(21, _SP_REGNO, 144);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS6))
-       ldxi_d(22, _SP_REGNO, 152);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS7))
-       ldxi_d(23, _SP_REGNO, 160);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS8))
-       ldxi_d(24, _SP_REGNO, 168);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS9))
-       ldxi_d(25, _SP_REGNO, 176);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS10))
-       ldxi_d(26, _SP_REGNO, 184);
-    if (jit_regset_tstbit(&_jitc->function->regset, _FS11))
-       ldxi_d(27, _SP_REGNO, 192);
-    addi(_SP_REGNO, _SP_REGNO, stack_framesize);
+    if (_jitc->function->need_frame) {
+       movr(_SP_REGNO, _FP_REGNO);
+       ldxi(_RA_REGNO, _SP_REGNO, 0);
+       ldxi(_FP_REGNO, _SP_REGNO, 8);
+    }
+
+    /* callee save registers */
+    for (reg = 0, offs = 16; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           ldxi(rn(iregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_word_t);
+       }
+    }
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           ldxi_d(rn(fregs[reg]), _SP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       addi(_SP_REGNO, _SP_REGNO, jit_framesize());
     RET();
 }
 
     RET();
 }
 
@@ -2288,9 +2247,9 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     assert(_jitc->function->self.call & jit_call_varargs);
     /* Initialize va_list to the first stack argument. */
     if (jit_arg_reg_p(_jitc->function->vagp))
     assert(_jitc->function->self.call & jit_call_varargs);
     /* Initialize va_list to the first stack argument. */
     if (jit_arg_reg_p(_jitc->function->vagp))
-       addi(r0, _FP_REGNO, stack_framesize - ((8 - _jitc->function->vagp) * 8));
+       addi(r0, _FP_REGNO, jit_framesize() - ((8 - _jitc->function->vagp) * 8));
     else
     else
-       addi(r0, _FP_REGNO, _jitc->function->self.size);
+       addi(r0, _FP_REGNO, jit_selfsize());
 }
 
 static void
 }
 
 static void
@@ -2333,7 +2292,6 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
        }
        else
            abort();
        }
        else
            abort();
-       i.w = u.i[1];
        assert(i.I.opcode == 3 && i.I.funct3 == 3);             /* LD */
     }
 #  else
        assert(i.I.opcode == 3 && i.I.funct3 == 3);             /* LD */
     }
 #  else
index e7884cb..89346e0 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2019-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index 8c4cf04..335d3cf 100644 (file)
@@ -1,10 +1,11 @@
 #if __WORDSIZE == 64
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 116
+#define JIT_INSTR_MAX 168
     0, /* data */
     0, /* live */
     4, /* align */
     0, /* save */
     0, /* load */
     0, /* data */
     0, /* live */
     4, /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     12,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
     12,        /* movi */
     12,        /* movnr */
     12,        /* movzr */
+    28,        /* casr */
+    40,        /* casi */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     4, /* extr_i */
     8, /* extr_ui */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     4, /* extr_i */
     8, /* extr_ui */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    116,       /* bswapr_ul */
     20,        /* htonr_us */
     52,        /* htonr_ui */
     116,       /* htonr_ul */
     20,        /* htonr_us */
     52,        /* htonr_ui */
     116,       /* htonr_ul */
     4, /* callr */
     16,        /* calli */
     0, /* prepare */
     4, /* callr */
     16,        /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     4, /* movr_w_f */
     0, /* movr_ww_d */
     4, /* movr_w_d */
     4, /* movr_w_f */
     0, /* movr_ww_d */
     4, /* movr_w_d */
-    0, /* movr_f_w */
+    4, /* movr_f_w */
     4, /* movi_f_w */
     0, /* movr_d_ww */
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
     4, /* movi_f_w */
     0, /* movr_d_ww */
     0, /* movi_d_ww */
     4, /* movr_d_w */
     12,        /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    116,       /* bswapr_ul */
-    28,        /* casr */
-    40,        /* casi */
+    168,       /* clo */
+    148,       /* clz */
+    168,       /* cto */
+    148,       /* ctz */
 #endif /* __WORDSIZE */
 #endif /* __WORDSIZE */
index 8828d4a..63a5cd9 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2019-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2019-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
  *     Paulo Cesar Pereira de Andrade
  */
 
  *     Paulo Cesar Pereira de Andrade
  */
 
+/* callee save                                   + variadic arguments
+ * align16(ra+fp+s[1-9]+s10+s11+fs[0-9]+fs10+fs11)+align16(a[0-7]) */
+#define stack_framesize                        (208 + 64)
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
 
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
 
@@ -28,6 +32,8 @@ typedef jit_pointer_t jit_va_list_t;
 /*
  * Prototypes
  */
 /*
  * Prototypes
  */
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #if __WORDSIZE == 64
 #  define load_const(r0, i0)           _load_const(_jit, r0, i0)
 static void _load_const(jit_state_t*, jit_int32_t, jit_word_t);
 #if __WORDSIZE == 64
 #  define load_const(r0, i0)           _load_const(_jit, r0, i0)
 static void _load_const(jit_state_t*, jit_int32_t, jit_word_t);
@@ -43,6 +49,7 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define PROTO                          1
 #  include "jit_riscv-cpu.c"
 #  include "jit_riscv-fpu.c"
 #define PROTO                          1
 #  include "jit_riscv-cpu.c"
 #  include "jit_riscv-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
 #undef PROTO
 
 /*
@@ -119,6 +126,14 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+    _S1, _S2, _S3, _S4, _S5, _S6, _S7, _S8, _S9, _S10, _S11
+};
+
+static jit_int32_t fregs[] = {
+    _FS0, _FS1, _FS2, _FS3, _FS4, _FS5, _FS6, _FS7, _FS8, _FS9, _FS10, _FS11
+};
+
 /*
  * Implementation
  */
 /*
  * Implementation
  */
@@ -180,6 +195,7 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -228,20 +244,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -301,16 +315,17 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
-    return (jit_arg_f_reg_p(u->u.w));
+    return (jit_arg_f_reg_p(u->u.w) || jit_arg_reg_p(u->u.w - 8));
 }
 
 void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
 }
 
 void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare) {
        jit_link_prepare();
        assert(!(_jitc->function->call.call & jit_call_varargs));
     if (_jitc->prepare) {
        jit_link_prepare();
        assert(!(_jitc->function->call.call & jit_call_varargs));
@@ -334,19 +349,23 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
     assert(!(_jitc->function->self.call & jit_call_varargs));
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
     assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -368,6 +387,7 @@ _jit_arg_f(jit_state_t *_jit)
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
@@ -391,6 +411,7 @@ _jit_arg_d(jit_state_t *_jit)
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
@@ -401,111 +422,129 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_c(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_c(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_uc(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_uc(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_s(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_s(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_us(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_us(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_i(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_i(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_ui(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_l(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
-    else
-       jit_stxi(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, JIT_FP, regno);
+       node = jit_stxi(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -520,8 +559,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(u, JIT_FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
        jit_movr_f(u, JIT_FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_f(u, JIT_RA0 - (v->u.w - 8));
-    else
-       jit_ldxi_f(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_f(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -534,8 +575,10 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(JIT_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
        jit_movr_f(JIT_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_f_w(JIT_RA0 - (v->u.w - 8), u);
-    else
-       jit_stxi_f(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_f(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -547,18 +590,14 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
     jit_inc_synth_fp(putargi_f, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(JIT_FA0 - v->u.w, u);
     jit_inc_synth_fp(putargi_f, u, v);
     if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(JIT_FA0 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       union {
-           jit_float32_t       f;
-           jit_int32_t         i;
-       } uu;
-       uu.f = u;
-       jit_movi(JIT_RA0 - (v->u.w - 8), uu.i);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_f_w(JIT_RA0 - (v->u.w - 8), u);
     else {
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, JIT_FP, regno);
+       node = jit_stxi_f(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -573,8 +612,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(u, JIT_FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
        jit_movr_d(u, JIT_FA0 - v->u.w);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_w_d(u, JIT_RA0 - (v->u.w - 8));
-    else
-       jit_ldxi_d(u, JIT_FP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_d(u, JIT_FP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -587,8 +628,10 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(JIT_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
        jit_movr_d(JIT_FA0 - v->u.w, u);
     else if (jit_arg_reg_p(v->u.w - 8))
        jit_movr_d_w(JIT_RA0 - (v->u.w - 8), u);
-    else
-       jit_stxi_d(v->u.w, JIT_FP, u);
+    else {
+       jit_node_t      *node = jit_stxi_d(v->u.w, JIT_FP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -600,28 +643,24 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
     jit_inc_synth_dp(putargi_d, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi_d(JIT_FA0 - v->u.w, u);
     jit_inc_synth_dp(putargi_d, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi_d(JIT_FA0 - v->u.w, u);
-    else if (jit_arg_reg_p(v->u.w - 8)) {
-       union {
-           jit_float64_t       d;
-           jit_int64_t         w;
-       } uu;
-       uu.d = u;
-       jit_movi(JIT_RA0 - (v->u.w - 8), uu.w);
-    }
+    else if (jit_arg_reg_p(v->u.w - 8))
+       jit_movi_d_w(JIT_RA0 - (v->u.w - 8), u);
     else {
     else {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, JIT_FP, regno);
+       node = jit_stxi_d(v->u.w, JIT_FP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
@@ -630,16 +669,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
     else {
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
 
 void
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
@@ -651,6 +691,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -673,6 +714,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
     else {
        jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -699,6 +741,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
        jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -721,6 +764,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
     else {
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
     else {
        jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -747,6 +791,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
        jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -775,6 +820,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -792,6 +838,7 @@ _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
     jit_node_t         *node;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
@@ -892,6 +939,7 @@ _emit_code(jit_state_t *_jit)
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
        jit_node_t      *node;
        jit_uint8_t     *data;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1066,6 +1114,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1113,6 +1164,14 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+#define clor(r0, r1)   fallback_clo(r0, r1)
+#define clzr(r0, r1)   fallback_clz(r0, r1)
+#define ctor(r0, r1)   fallback_cto(r0, r1)
+#define ctzr(r0, r1)   fallback_ctz(r0, r1)
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1434,6 +1493,7 @@ _emit_code(jit_state_t *_jit)
                case_brr(bunord, _d);
                case_brd(bunord);
            case jit_code_jmpr:
                case_brr(bunord, _d);
                case_brd(bunord);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -1444,14 +1504,22 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (simm20_p(word))
+                           word = jmpi(_jit->pc.w);
+                       else
                        word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
                    jmpi(node->u.w);
+               }
                break;
            case jit_code_callr:
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
@@ -1462,22 +1530,32 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
-                       word = calli_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (simm20_p(word))
+                           word = calli(_jit->pc.w);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    calli(node->u.w);
                    calli(node->u.w);
+               }
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
+               compute_framesize();
+               patch_alist(0);
                _jitc->again = 0;
                prolog(node);
                break;
                _jitc->again = 0;
                prolog(node);
                break;
@@ -1493,10 +1571,25 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* this will be recomputed but undo anyway to have it
+                    * better self documented.*/
+                   undo.func.need_stack = _jitc->function->need_stack;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                /* remember label is defined */
                    goto restart_function;
                }
                /* remember label is defined */
@@ -1537,11 +1630,19 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+           case jit_code_arg_l:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1549,10 +1650,22 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_i:             case jit_code_getarg_ui:
            case jit_code_getarg_l:
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1659,6 +1772,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_riscv-cpu.c"
 #  include "jit_riscv-fpu.c"
 #define CODE                           1
 #  include "jit_riscv-cpu.c"
 #  include "jit_riscv-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 static void
 #undef CODE
 
 static void
@@ -1806,6 +1920,30 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     stxi_d(i0, rn(r0), rn(r1));
 }
 
     stxi_d(i0, rn(r0), rn(r1));
 }
 
+#if __WORDSIZE != 64
+# error "only 64 bit ports tested"
+#endif
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    _jitc->framesize = 16;     /* ra+fp */
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += sizeof(jit_word_t);
+
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+
+    /* Space to store variadic arguments */
+    if (_jitc->function->self.call & jit_call_varargs)
+       _jitc->framesize += (8 - _jitc->function->vagp) * 8;
+
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
index 55b7e1f..2e9e074 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -394,6 +394,8 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define EAR(R1,R2)                   RRE_(0xB24F,R1,R2)
 /* EXTRACT PSW */
 #  define EPSW(R1,R2)                  RRE_(0xB98D,R1,R2)
 #  define EAR(R1,R2)                   RRE_(0xB24F,R1,R2)
 /* EXTRACT PSW */
 #  define EPSW(R1,R2)                  RRE_(0xB98D,R1,R2)
+/* FIND LEFTMOST ONE */
+#  define FLOGR(R1,R2)                 RRE_(0xB983,R1,R2)
 /* INSERT CHARACTER */
 #  define IC(R1,D2,X2,B2)              RX_(0x43,R1,X2,B2,D2)
 #  define ICY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x73)
 /* INSERT CHARACTER */
 #  define IC(R1,D2,X2,B2)              RX_(0x43,R1,X2,B2,D2)
 #  define ICY(R1,D2,X2,B2)             RXY_(0xE3,R1,X2,B2,D2,0x73)
@@ -966,9 +968,14 @@ static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
 static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
 #  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
 static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
-#  define bswapr_us(r0, r1)            generic_bswapr_us(_jit, r0, r1)
-#  define bswapr_ui(r0, r1)            generic_bswapr_ui(_jit, r0, r1)
-#  define bswapr_ul(r0, r1)            generic_bswapr_ul(_jit, r0, r1)
+#  define bswapr_us(r0, r1)            _bswapr_us(_jit, r0, r1)
+static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define bswapr_ui(r0, r1)            _bswapr_ui(_jit, r0, r1)
+static void _bswapr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#define bswapr_ul(r0, r1)              _bswapr_ul(_jit, r0, r1)
+static void _bswapr_ul(jit_state_t*,jit_int32_t,jit_int32_t);
+#endif
 #  define movnr(r0,r1,r2)              _movnr(_jit,r0,r1,r2)
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
 #  define movnr(r0,r1,r2)              _movnr(_jit,r0,r1,r2)
 static void _movnr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define movzr(r0,r1,r2)              _movzr(_jit,r0,r1,r2)
@@ -1051,32 +1058,39 @@ static void _qdivi_u(jit_state_t*,jit_int32_t,
 #  if __WORDSIZE == 32
 #    define lshr(r0,r1,r2)             _lshr(_jit,r0,r1,r2)
 static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  if __WORDSIZE == 32
 #    define lshr(r0,r1,r2)             _lshr(_jit,r0,r1,r2)
 static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-#  else
-#    define lshr(r0,r1,r2)             SLLG(r0,r1,0,r2)
-#  endif
-#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
+#    define lshi(r0,r1,i0)             _lshi(_jit,r0,r1,i0)
 static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  if __WORDSIZE == 32
 #    define rshr(r0,r1,r2)             _rshr(_jit,r0,r1,r2)
 static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #    define rshr(r0,r1,r2)             _rshr(_jit,r0,r1,r2)
 static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-#  else
-#    define rshr(r0,r1,r2)             SRAG(r0,r1,0,r2)
-#  endif
-#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
+#    define rshi(r0,r1,i0)             _rshi(_jit,r0,r1,i0);
 static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  if __WORDSIZE == 32
 #    define rshr_u(r0,r1,r2)           _rshr_u(_jit,r0,r1,r2)
 static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #    define rshr_u(r0,r1,r2)           _rshr_u(_jit,r0,r1,r2)
 static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  else
 #  else
+#    define lshr(r0,r1,r2)             SLLG(r0,r1,0,r2)
+#    define lshi(r0,r1,i0)             SLLG(r0,r1,i0,0)
+#    define rshr(r0,r1,r2)             SRAG(r0,r1,0,r2)
+#    define rshi(r0,r1,i0)             SRAG(r0,r1,i0,0)
 #    define rshr_u(r0,r1,r2)           SRLG(r0,r1,0,r2)
 #    define rshr_u(r0,r1,r2)           SRLG(r0,r1,0,r2)
+#    define rshi_u(r0,r1,i0)           SRLG(r0,r1,i0,0)
 #  endif
 #  endif
-#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
-static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  if __WORDSIZE == 32
 #    define negr(r0,r1)                        LCR(r0,r1)
 #  else
 #    define negr(r0,r1)                        LCGR(r0,r1)
 #  endif
 #  if __WORDSIZE == 32
 #    define negr(r0,r1)                        LCR(r0,r1)
 #  else
 #    define negr(r0,r1)                        LCGR(r0,r1)
 #  endif
+#  define bitswap(r0, r1)              _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define comr(r0,r1)                  _comr(_jit,r0,r1)
 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define andr(r0,r1,r2)               _andr(_jit,r0,r1,r2)
 #  define comr(r0,r1)                  _comr(_jit,r0,r1)
 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define andr(r0,r1,r2)               _andr(_jit,r0,r1,r2)
@@ -1289,13 +1303,13 @@ static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define bmci(i0,r0,i1)               bmxi(CC_E,i0,r0,i1)
 #  define bmci_p(i0,r0,i1)             bmxi_p(CC_E,i0,r0,i1)
 #  define jmpr(r0)                     BR(r0)
 #  define bmci(i0,r0,i1)               bmxi(CC_E,i0,r0,i1)
 #  define bmci_p(i0,r0,i1)             bmxi_p(CC_E,i0,r0,i1)
 #  define jmpr(r0)                     BR(r0)
-#  define jmpi(i0)                     _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+#  define jmpi(i0,i1)                  _jmpi(_jit,i0,i1)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t, jit_bool_t);
 #  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    BALR(_R14_REGNO,r0)
 #  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    BALR(_R14_REGNO,r0)
-#  define calli(i0)                    _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
+#  define calli(i0,i1)                 _calli(_jit,i0,i1)
+static jit_word_t _calli(jit_state_t*,jit_word_t, jit_bool_t);
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(i0)                   _prolog(_jit,i0)
 #  define calli_p(i0)                  _calli_p(_jit,i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(i0)                   _prolog(_jit,i0)
@@ -2473,6 +2487,31 @@ _movzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     patch_at(w, _jit->pc.w);
 }
 
     patch_at(w, _jit->pc.w);
 }
 
+static void
+_bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LRVR(r0, r1);
+    SRL(r0, 16, 0);
+    LLGHR(r0, r0);
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LRVR(r0, r1);
+#  if __WORDSIZE == 64
+    LLGFR(r0, r0);
+#  endif
+}
+
+#if __WORDSIZE == 64
+static void
+_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    LRVGR(r0, r1);
+}
+#endif
+
 static void
 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
 static void
 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
@@ -2897,19 +2936,14 @@ _lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        SLL(r0, 0, r2);
     }
 }
        SLL(r0, 0, r2);
     }
 }
-#endif
 
 static void
 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 
 static void
 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg_but_zero(0);
-    movi(rn(reg), i0);
-    lshr(r0, r1, rn(reg));
-    jit_unget_reg_but_zero(reg);
+    movr(r0, r1);
+    SLL(r0, i0, 0);
 }
 
 }
 
-#  if __WORDSIZE == 32
 static void
 _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2926,19 +2960,14 @@ _rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        SRA(r0, 0, r2);
     }
 }
        SRA(r0, 0, r2);
     }
 }
-#endif
 
 static void
 _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 
 static void
 _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg_but_zero(0);
-    movi(rn(reg), i0);
-    rshr(r0, r1, rn(reg));
-    jit_unget_reg_but_zero(reg);
+    movr(r0, r1);
+    SRA(r0, i0, 0);
 }
 
 }
 
-#  if __WORDSIZE == 32
 static void
 _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -2955,16 +2984,141 @@ _rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        SRL(r0, 0, r2);
     }
 }
        SRL(r0, 0, r2);
     }
 }
-#endif
 
 static void
 _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 
 static void
 _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg_but_zero(0);
-    movi(rn(reg), i0);
-    rshr_u(r0, r1, rn(reg));
-    jit_unget_reg_but_zero(reg);
+    movr(r0, r1);
+    SRL(r0, i0, 0);
+}
+#endif
+
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1, t2, t3, t4;
+    movr(r0, r1);
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+    rshi_u(rn(t1), r0, 1);             /* t1 = v >> 1 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 1);           /* t2 <<= 1 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+    rshi_u(rn(t1), r0, 2);             /* t1 = v >> 2 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 2);           /* t2 <<= 2 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+    rshi_u(rn(t1), r0, 4);             /* t1 = v >> 4 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 4);           /* t2 <<= 4 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ?  0x00ff00ffL : 0x00ff00ff00ff00ffL);
+    rshi_u(rn(t1), r0, 8);             /* t1 = v >> 8 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 8);           /* t2 <<= 8 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  if __WORDSIZE == 32
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    lshi(rn(t2), r0, 16);              /* t2 = v << 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  else
+    movi(rn(t0), 0x0000ffff0000ffffL);
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 16);          /* t2 <<= 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    rshi_u(rn(t1), r0, 32);            /* t1 = v >> 32 */
+    lshi(rn(t2), r0, 32);              /* t2 = v << 32 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  endif
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+    if (jit_cpu.flogr) {
+#endif
+       comr(r0, r1);
+       clzr(r0, r0);
+#if CHECK_FLOGR
+    }
+    else
+       fallback_clo(r0, r1);
+#endif
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+    if (jit_cpu.flogr) {
+#endif
+#if __WORDSIZE == 32
+       jit_word_t              w;
+#endif
+       jit_int32_t             regno;
+       regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+       SLLG(rn(regno), r1, 32, 0);
+#else
+       movr(rn(regno), r1);
+#endif
+       FLOGR(rn(regno), rn(regno));
+       movr(r0, rn(regno));
+#if __WORDSIZE == 32
+       w = blei_p(_jit->pc.w, r0, 31);
+       rshi(r0, r0, 1);        /* r0 is 64 */
+       patch_at(w, _jit->pc.w);
+#endif
+       jit_unget_reg_pair(regno);
+#if CHECK_FLOGR
+    }
+    else
+       fallback_clz(r0, r1);
+#endif
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+    if (jit_cpu.flogr) {
+#endif
+       bitswap(r0, r1);
+       clor(r0, r0);
+#if CHECK_FLOGR
+    }
+    else
+       fallback_cto(r0, r1);
+#endif
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+#if CHECK_FLOGR
+    if (jit_cpu.flogr) {
+#endif
+       bitswap(r0, r1);
+       clzr(r0, r0);
+#if CHECK_FLOGR
+    }
+    else
+       fallback_ctz(r0, r1);
+#endif
 }
 
 static void
 }
 
 static void
@@ -3497,13 +3651,14 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 }
 #endif
 
 }
 #endif
 
-static void
-_jmpi(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
 {
 {
-    jit_word_t         d;
     jit_int32_t                reg;
     jit_int32_t                reg;
-    d = (i0 - _jit->pc.w) >> 1;
-    if (s16_p(d))
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 1;
+    if (i1 && s16_p(d))
        J(x16(d));
     else if (s32_p(d))
        BRL(d);
        J(x16(d));
     else if (s32_p(d))
        BRL(d);
@@ -3513,6 +3668,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0)
        jmpr(rn(reg));
        jit_unget_reg_but_zero(reg);
     }
        jmpr(rn(reg));
        jit_unget_reg_but_zero(reg);
     }
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3527,13 +3683,16 @@ _jmpi_p(jit_state_t *_jit, jit_word_t i0)
     return (w);
 }
 
     return (w);
 }
 
-static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+static jit_word_t
+_calli(jit_state_t *_jit, jit_word_t i0, jit_bool_t i1)
 {
 {
-    jit_word_t         d;
     jit_int32_t                reg;
     jit_int32_t                reg;
-    d = (i0 - _jit->pc.w) >> 1;
-    if (s32_p(d))
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 1;
+    if (i1 && s16_p(d))
+       BRAS(_R14_REGNO, x16(d));
+    else if (s32_p(d))
        BRASL(_R14_REGNO, d);
     else {
        reg = jit_get_reg_but_zero(0);
        BRASL(_R14_REGNO, d);
     else {
        reg = jit_get_reg_but_zero(0);
@@ -3541,6 +3700,7 @@ _calli(jit_state_t *_jit, jit_word_t i0)
        callr(rn(reg));
        jit_unget_reg_but_zero(reg);
     }
        callr(rn(reg));
        jit_unget_reg_but_zero(reg);
     }
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3889,17 +4049,17 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
        u.s[7] = i1.s;
 #endif
     }
        u.s[7] = i1.s;
 #endif
     }
-    /* BRC */
+    /* BRC or BRL */
     else if (i0.b.op == 0xA7) {
     else if (i0.b.op == 0xA7) {
-       assert(i0.b.r3 == 0x4);
+       assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5);
        d = (label - instr) >> 1;
        assert(s16_p(d));
        i1.b.i2 = d;
        u.s[1] = i1.s;
     }
        d = (label - instr) >> 1;
        assert(s16_p(d));
        i1.b.i2 = d;
        u.s[1] = i1.s;
     }
-    /* BRCL */
+    /* BRCL or BRASL */
     else if (i0.b.op == 0xC0) {
     else if (i0.b.op == 0xC0) {
-       assert(i0.b.r3 == 0x4);
+       assert(i0.b.r3 == 0x4 || i0.b.r3 == 0x5);
        d = (label - instr) >> 1;
        assert(s32_p(d));
        i12.i = d;
        d = (label - instr) >> 1;
        assert(s32_p(d));
        i12.i = d;
index edf9ddd..6c3c4ac 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
index e70c65f..ee30447 100644 (file)
@@ -1,11 +1,11 @@
-
 #if __WORDSIZE == 32
 #if __WORDSIZE == 32
-#define JIT_INSTR_MAX 94
+#define JIT_INSTR_MAX 164
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    2, /* align */
+    4, /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     2, /* label */
     0, /* #name */
     0, /* #note */
     2, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     40,        /* va_start */
     40,        /* va_start */
-    86,        /* va_arg */
-    82,        /* va_arg_d */
+    82,        /* va_arg */
+    78,        /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
@@ -43,8 +58,8 @@
     14,        /* rsbi */
     6, /* mulr */
     14,        /* muli */
     14,        /* rsbi */
     6, /* mulr */
     14,        /* muli */
-    46,        /* qmulr */
-    50,        /* qmuli */
+    38,        /* qmulr */
+    42,        /* qmuli */
     10,        /* qmulr_u */
     18,        /* qmuli_u */
     10,        /* divr */
     10,        /* qmulr_u */
     18,        /* qmuli_u */
     10,        /* divr */
     4, /* xorr */
     12,        /* xori */
     8, /* lshr */
     4, /* xorr */
     12,        /* xori */
     8, /* lshr */
-    10,        /* lshi */
+    6, /* lshi */
     8, /* rshr */
     8, /* rshr */
-    10,        /* rshi */
+    6, /* rshi */
     8, /* rshr_u */
     8, /* rshr_u */
-    10,        /* rshi_u */
+    6, /* rshi_u */
     2, /* negr */
     8, /* comr */
     16,        /* ltr */
     2, /* negr */
     8, /* comr */
     16,        /* ltr */
     8, /* movi */
     14,        /* movnr */
     14,        /* movzr */
     8, /* movi */
     14,        /* movnr */
     14,        /* movzr */
+    22,        /* casr */
+    28,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    12,        /* bswapr_us */
+    4, /* bswapr_ui */
+    0, /* bswapr_ul */
     4, /* htonr_us */
     2, /* htonr_ui */
     0, /* htonr_ul */
     4, /* htonr_us */
     2, /* htonr_ui */
     0, /* htonr_ul */
     8, /* bxsubr_u */
     12,        /* bxsubi_u */
     2, /* jmpr */
     8, /* bxsubr_u */
     12,        /* bxsubi_u */
     2, /* jmpr */
-    10,        /* jmpi */
+    6, /* jmpi */
     2, /* callr */
     2, /* callr */
-    10,        /* calli */
+    6, /* calli */
     0, /* prepare */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    38,        /* bswapr_us */
-    94,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    22,        /* casr */
-    28,        /* casi */
+    36,        /* clo */
+    28,        /* clz */
+    164,       /* cto */
+    158,       /* ctz */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 300
+#define JIT_INSTR_MAX 280
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    6, /* align */
+    20,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     2, /* label */
     0, /* #name */
     0, /* #note */
     2, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     44,        /* va_start */
     44,        /* va_start */
-    104,       /* va_arg */
-    100,       /* va_arg_d */
+    100,       /* va_arg */
+    96,        /* va_arg_d */
     0, /* va_end */
     8, /* addr */
     24,        /* addi */
     0, /* va_end */
     8, /* addr */
     24,        /* addi */
     28,        /* rsbi */
     8, /* mulr */
     24,        /* muli */
     28,        /* rsbi */
     8, /* mulr */
     24,        /* muli */
-    60,        /* qmulr */
-    68,        /* qmuli */
+    52,        /* qmulr */
+    60,        /* qmuli */
     16,        /* qmulr_u */
     32,        /* qmuli_u */
     12,        /* divr */
     16,        /* qmulr_u */
     32,        /* qmuli_u */
     12,        /* divr */
     8, /* xorr */
     24,        /* xori */
     6, /* lshr */
     8, /* xorr */
     24,        /* xori */
     6, /* lshr */
-    10,        /* lshi */
+    6, /* lshi */
     6, /* rshr */
     6, /* rshr */
-    10,        /* rshi */
+    6, /* rshi */
     6, /* rshr_u */
     6, /* rshr_u */
-    10,        /* rshi_u */
+    6, /* rshi_u */
     4, /* negr */
     12,        /* comr */
     20,        /* ltr */
     4, /* negr */
     12,        /* comr */
     20,        /* ltr */
     16,        /* movi */
     18,        /* movnr */
     18,        /* movzr */
     16,        /* movi */
     18,        /* movnr */
     18,        /* movzr */
+    30,        /* casr */
+    42,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     4, /* extr_i */
     4, /* extr_ui */
+    12,        /* bswapr_us */
+    8, /* bswapr_ui */
+    4, /* bswapr_ul */
     4, /* htonr_us */
     4, /* htonr_ui */
     4, /* htonr_ul */
     6, /* ldr_c */
     18,        /* ldi_c */
     6, /* ldr_uc */
     4, /* htonr_us */
     4, /* htonr_ui */
     4, /* htonr_ul */
     6, /* ldr_c */
     18,        /* ldi_c */
     6, /* ldr_uc */
-    18,        /* ldi_uc */
+    22,        /* ldi_uc */
     6, /* ldr_s */
     18,        /* ldi_s */
     6, /* ldr_us */
     6, /* ldr_s */
     18,        /* ldi_s */
     6, /* ldr_us */
     14,        /* ldxr_l */
     26,        /* ldxi_l */
     4, /* str_c */
     14,        /* ldxr_l */
     26,        /* ldxi_l */
     4, /* str_c */
-    16,        /* sti_c */
+    20,        /* sti_c */
     4, /* str_s */
     16,        /* sti_s */
     4, /* str_i */
     4, /* str_s */
     16,        /* sti_s */
     4, /* str_i */
     10,        /* bxsubr_u */
     14,        /* bxsubi_u */
     2, /* jmpr */
     10,        /* bxsubr_u */
     14,        /* bxsubi_u */
     2, /* jmpr */
-    18,        /* jmpi */
+    6, /* jmpi */
     2, /* callr */
     2, /* callr */
-    18,        /* calli */
+    14,        /* calli */
     0, /* prepare */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    60,        /* bswapr_us */
-    140,       /* bswapr_ui */
-    300,       /* bswapr_ul */
-    30,        /* casr */
-    42,        /* casi */
+    24,        /* clo */
+    12,        /* clz */
+    280,       /* cto */
+    272,       /* ctz */
 #endif /* __WORDSIZE */
 #endif /* __WORDSIZE */
index 30ab760..6934b11 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
  * Authors:
  *     Paulo Cesar Pereira de Andrade
  */
  * Authors:
  *     Paulo Cesar Pereira de Andrade
  */
+#define CHECK_FLOGR    0
+
+#if CHECK_FLOGR
+#include <signal.h>
+#include <setjmp.h>
+#endif
 
 #include <lightning.h>
 #include <lightning/jit_private.h>
 
 #include <lightning.h>
 #include <lightning/jit_private.h>
@@ -88,11 +94,15 @@ extern void __clear_cache(void *, void *);
 #define PROTO                          1
 #  include "jit_s390-cpu.c"
 #  include "jit_s390-fpu.c"
 #define PROTO                          1
 #  include "jit_s390-cpu.c"
 #  include "jit_s390-fpu.c"
+#  if CHECK_FLOGR
+#    include "jit_fallback.c"
+#  endif
 #undef PROTO
 
 /*
  * Initialization
  */
 #undef PROTO
 
 /*
  * Initialization
  */
+jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0x0,                   "%r0" },
     { rc(gpr) | 0x1,                   "%r1" },
 jit_register_t         _rvs[] = {
     { rc(gpr) | 0x0,                   "%r0" },
     { rc(gpr) | 0x1,                   "%r1" },
@@ -129,13 +139,48 @@ jit_register_t            _rvs[] = {
     { rc(fpr) | rc(arg) | 0x0,         "%f0" },
     { _NOREG,                          "<none>" },
 };
     { rc(fpr) | rc(arg) | 0x0,         "%f0" },
     { _NOREG,                          "<none>" },
 };
+#if CHECK_FLOGR
+static sigjmp_buf      jit_env;
+#endif
 
 /*
  * Implementation
  */
 
 /*
  * Implementation
  */
+#if CHECK_FLOGR
+static void
+sigill_handler(int signum)
+{
+    jit_cpu.flogr = 0;
+    siglongjmp(jit_env, 1);
+}
+#endif
+
 void
 jit_get_cpu(void)
 {
 void
 jit_get_cpu(void)
 {
+#if CHECK_FLOGR
+    int                        r12, r13;
+    struct             sigaction new_action, old_action;
+    new_action.sa_handler = sigill_handler;
+    sigemptyset(&new_action.sa_mask);
+    new_action.sa_flags = 0;
+    sigaction(SIGILL, NULL, &old_action);
+    if (old_action.sa_handler != SIG_IGN) {
+       sigaction(SIGILL, &new_action, NULL);
+       if (!sigsetjmp(jit_env, 1)) {
+           jit_cpu.flogr = 1;
+           /* flogr %r12, %r12 */
+           __asm__ volatile("lgr %%r12, %0; lgr %%r13, %1;"
+                            "flogr %%r12, %%r12;"
+                            "lgr %1, %%r13; lgr %0, %%r12;"
+                            : "=r" (r12), "=r" (r13));
+           sigaction(SIGILL, &old_action, NULL);
+       }
+    }
+#else
+    /* By default, assume it is available */
+    jit_cpu.flogr = 1;
+#endif
 }
 
 void
 }
 
 void
@@ -240,18 +285,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
+    jit_code_inc_synth_w(code, u);
     jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -305,7 +350,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -352,18 +397,22 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     }
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_word_t);
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -408,7 +457,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _R2 - v->u.w);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _R2 - v->u.w);
@@ -421,7 +470,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _R2 - v->u.w);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _R2 - v->u.w);
@@ -434,7 +483,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _R2 - v->u.w);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _R2 - v->u.w);
@@ -447,7 +496,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _R2 - v->u.w);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _R2 - v->u.w);
@@ -460,7 +509,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #if __WORDSIZE == 32
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #if __WORDSIZE == 32
@@ -479,7 +528,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _R2 - v->u.w);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _R2 - v->u.w);
@@ -492,7 +541,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _R2 - v->u.w);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _R2 - v->u.w);
@@ -503,10 +552,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 #endif
 
 void
 #endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_R2 - v->u.w, u);
     else
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_R2 - v->u.w, u);
     else
@@ -515,11 +564,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_R2 - v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_R2 - v->u.w, u);
     else {
@@ -627,10 +676,10 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_R2 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_R2 - _jitc->function->call.argi, u);
@@ -644,11 +693,11 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_R2 - _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_R2 - _jitc->function->call.argi, u);
@@ -890,6 +939,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1032,6 +1082,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 1) & ~1);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1082,6 +1135,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
@@ -1427,14 +1484,21 @@ _emit_code(jit_state_t *_jit)
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
-                       jmpi(temp->u.w);
+                       jmpi(temp->u.w, 1);
                    else {
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s32_p(word)) {
+                           offset = s16_p(word);
+                           word = jmpi(_jit->pc.w, offset);
+                       }
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                else
                        patch(word, node);
                    }
                }
                else
-                   jmpi(node->u.w);
+                   jmpi(node->u.w, 1);
                break;
            case jit_code_callr:
                callr(rn(node->u.w));
                break;
            case jit_code_callr:
                callr(rn(node->u.w));
@@ -1445,19 +1509,27 @@ _emit_code(jit_state_t *_jit)
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    if (temp->flag & jit_flag_patch)
-                       calli(temp->u.w);
+                       calli(temp->u.w, 1);
                    else {
                    else {
-                       word = calli_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s32_p(word)) {
+                           offset =s16_p(word);
+                           word = calli(_jit->pc.w, offset);
+                       }
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                else
                        patch(word, node);
                    }
                }
                else
-                   calli(node->u.w);
+                   calli(node->u.w, 1);
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1478,6 +1550,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1504,11 +1586,23 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1518,10 +1612,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1565,6 +1675,9 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_s390-cpu.c"
 #  include "jit_s390-fpu.c"
 #define CODE                           1
 #  include "jit_s390-cpu.c"
 #  include "jit_s390-fpu.c"
+#  if CHECK_FLOGR
+#    include "jit_fallback.c"
+#  endif
 #undef CODE
 
 void
 #undef CODE
 
 void
index b3e1cae..143a5d9 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -28,7 +28,7 @@
  */
 static jit_int16_t     _szs[jit_code_last_code] = {
 #if GET_JIT_SIZE
  */
 static jit_int16_t     _szs[jit_code_last_code] = {
 #if GET_JIT_SIZE
-#  define JIT_INSTR_MAX                512
+#  define JIT_INSTR_MAX                1024
 #else
 #  if defined(__i386__) || defined(__x86_64__)
 #    include "jit_x86-sz.c"
 #else
 #  if defined(__i386__) || defined(__x86_64__)
 #    include "jit_x86-sz.c"
@@ -121,7 +121,15 @@ _jit_get_size(jit_state_t *_jit)
                break;
        }
 #  endif
                break;
        }
 #  endif
-       size += _szs[node->code];
+       switch (node->code) {
+           /* The instructions are special because they can be arbitrarily long.  */
+           case jit_code_align:
+           case jit_code_skip:
+               size += node->u.w;
+               break;
+           default:
+               size += _szs[node->code];
+       }
     }
 #  if __riscv && __WORDSIZE == 64
     /* Heuristically only 20% of constants are unique. */
     }
 #  if __riscv && __WORDSIZE == 64
     /* Heuristically only 20% of constants are unique. */
@@ -143,7 +151,7 @@ jit_finish_size(void)
 {
 #if GET_JIT_SIZE
     FILE               *fp;
 {
 #if GET_JIT_SIZE
     FILE               *fp;
-    jit_word_t          offset;
+    int                         offset;
 
     /* Define a single path */
     fp = fopen(JIT_SIZE_PATH, "a");
 
     /* Define a single path */
     fp = fopen(JIT_SIZE_PATH, "a");
index 86eb05e..f4ce621 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -120,6 +120,11 @@ static void _f3t(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
 static void _f3a(jit_state_t*,jit_int32_t,
                 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
     maybe_unused;
 static void _f3a(jit_state_t*,jit_int32_t,
                 jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
     maybe_unused;
+#  define f2c1(op,rd,op3,rs1,opf,rs2)  _f2c1(_jit,op,rd,op3,rs1,opf,rs2)
+static void
+_f2c1(jit_state_t*,jit_int32_t, jit_int32_t,
+      jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t)
+    maybe_unused;
 #  define LDSB(rs1, rs2, rd)           f3r(3, rd, 9, rs1, rs2)
 #  define LDSBI(rs1, imm, rd)          f3i(3, rd, 9, rs1, imm)
 #  define LDSH(rs1, rs2, rd)           f3r(3, rd, 10, rs1, rs2)
 #  define LDSB(rs1, rs2, rd)           f3r(3, rd, 9, rs1, rs2)
 #  define LDSBI(rs1, imm, rd)          f3i(3, rd, 9, rs1, imm)
 #  define LDSH(rs1, rs2, rd)           f3r(3, rd, 10, rs1, rs2)
@@ -545,6 +550,7 @@ static void _f3a(jit_state_t*,jit_int32_t,
 #  define UNIMP(imm)                   f2r(0, 0, 0, imm)
 #  define FLUSH(rs1, rs2)              f3r(2, 0, 59, rs1, rs2)
 #  define FLUSHI(rs1, im)              f3i(2, 0, 59, rs1, imm)
 #  define UNIMP(imm)                   f2r(0, 0, 0, imm)
 #  define FLUSH(rs1, rs2)              f3r(2, 0, 59, rs1, rs2)
 #  define FLUSHI(rs1, im)              f3i(2, 0, 59, rs1, imm)
+#  define LZCNT(rs2, rd)               f2c1(2, rd, 54, 0, 23, rs2)
 #  define nop(i0)                      _nop(_jit, i0)
 static void _nop(jit_state_t*, jit_int32_t);
 #  define movr(r0, r1)                 _movr(_jit, r0, r1)
 #  define nop(i0)                      _nop(_jit, i0)
 static void _nop(jit_state_t*, jit_int32_t);
 #  define movr(r0, r1)                 _movr(_jit, r0, r1)
@@ -567,6 +573,16 @@ static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
 #define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define comr(r0, r1)                 XNOR(r1, 0, r0)
 #  define negr(r0, r1)                 NEG(r1, r0)
 #define casi(r0, i0, r1, r2)           casx(r0, _NOREG, r1, r2, i0)
 #  define comr(r0, r1)                 XNOR(r1, 0, r0)
 #  define negr(r0, r1)                 NEG(r1, r0)
+#  define bitswap(r0, r1)              _bitswap(_jit, r0, r1)
+static void _bitswap(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define addr(r0, r1, r2)             ADD(r1, r2, r0)
 #  define addi(r0, r1, i0)             _addi(_jit, r0, r1, i0)
 static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #  define addr(r0, r1, r2)             ADD(r1, r2, r0)
 #  define addi(r0, r1, i0)             _addi(_jit, r0, r1, i0)
 static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
@@ -941,13 +957,13 @@ _bm_w(jit_state_t*,jit_bool_t,jit_word_t,jit_int32_t,jit_word_t);
 #  define jmpr(r0)                     _jmpr(_jit, r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #  define jmpi(i0)                     _jmpi(_jit, i0)
 #  define jmpr(r0)                     _jmpr(_jit, r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #  define jmpi(i0)                     _jmpi(_jit, i0)
-static void _jmpi(jit_state_t*,jit_word_t);
+static jit_word_t _jmpi(jit_state_t*,jit_word_t);
 #  define jmpi_p(i0)                   _jmpi_p(_jit, i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    _callr(_jit, r0)
 static void _callr(jit_state_t*,jit_int32_t);
 #  define calli(i0)                    _calli(_jit, i0)
 #  define jmpi_p(i0)                   _jmpi_p(_jit, i0)
 static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
 #  define callr(r0)                    _callr(_jit, r0)
 static void _callr(jit_state_t*,jit_int32_t);
 #  define calli(i0)                    _calli(_jit, i0)
-static void _calli(jit_state_t*,jit_word_t);
+static jit_word_t _calli(jit_state_t*,jit_word_t);
 #  define calli_p(i0)                  _calli_p(_jit, i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(node)                 _prolog(_jit, node)
 #  define calli_p(i0)                  _calli_p(_jit, i0)
 static jit_word_t _calli_p(jit_state_t*,jit_word_t);
 #  define prolog(node)                 _prolog(_jit, node)
@@ -1182,6 +1198,26 @@ _f1(jit_state_t *_jit, jit_int32_t op, jit_int32_t disp30)
     ii(v.v);
 }
 
     ii(v.v);
 }
 
+static void
+_f2c1(jit_state_t *_jit, jit_int32_t op, jit_int32_t rd,
+      jit_int32_t op3, jit_int32_t rs1, jit_int32_t opf, jit_int32_t rs2)
+{
+    jit_instr_t                v;
+    assert(!(op  & 0xfffffffc));
+    assert(!(rd  & 0xffffffe0));
+    assert(!(res & 0xffffffc0));
+    assert(!(rs1 & 0xffffffe0));
+    assert(!(opf & 0xfffffe00));
+    assert(!(rs2 & 0xfffffe00));
+    v.op.b = op;
+    v.rd.b = rd;
+    v.op3.b = op3;
+    v.rs1.b = rs1;
+    v.opf.b = opf;
+    v.rs2.b = rs2;
+    ii(v.v);
+}
+
 static void
 _nop(jit_state_t *_jit, jit_int32_t i0)
 {
 static void
 _nop(jit_state_t *_jit, jit_int32_t i0)
 {
@@ -1296,6 +1332,111 @@ _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
         jit_unget_reg(r1_reg);
 }
 
         jit_unget_reg(r1_reg);
 }
 
+static void
+_bitswap(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                t0, t1, t2, t3, t4;
+    movr(r0, r1);
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    movi(rn(t0), __WORDSIZE == 32 ? 0x55555555L : 0x5555555555555555L);
+    rshi_u(rn(t1), r0, 1);             /* t1 = v >> 1 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 1);           /* t2 <<= 1 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x33333333L : 0x3333333333333333L);
+    rshi_u(rn(t1), r0, 2);             /* t1 = v >> 2 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 2);           /* t2 <<= 2 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ? 0x0f0f0f0fL : 0x0f0f0f0f0f0f0f0fL);
+    rshi_u(rn(t1), r0, 4);             /* t1 = v >> 4 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 4);           /* t2 <<= 4 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    movi(rn(t0), __WORDSIZE == 32 ?  0x00ff00ffL : 0x00ff00ff00ff00ffL);
+    rshi_u(rn(t1), r0, 8);             /* t1 = v >> 8 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 8);           /* t2 <<= 8 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  if __WORDSIZE == 32
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    lshi(rn(t2), r0, 16);              /* t2 = v << 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  else
+    movi(rn(t0), 0x0000ffff0000ffffL);
+    rshi_u(rn(t1), r0, 16);            /* t1 = v >> 16 */
+    andr(rn(t1), rn(t1), rn(t0));      /* t1 &= t0 */
+    andr(rn(t2), r0, rn(t0));          /* t2 = v & t0*/
+    lshi(rn(t2), rn(t2), 16);          /* t2 <<= 16 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+    rshi_u(rn(t1), r0, 32);            /* t1 = v >> 32 */
+    lshi(rn(t2), r0, 32);              /* t2 = v << 32 */
+    orr(r0, rn(t1), rn(t2));           /* v = t1 | t2 */
+#  endif
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.lzcnt) {
+       comr(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_clo(r0, r1);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.lzcnt) {
+#if __WORDSIZE == 32
+       jit_word_t              w;
+       SLLXI(r1, 32, r0);
+       LZCNT(r0, r0);
+#if __WORDSIZE == 32
+       w = blei(_jit->pc.w, r0, 31);
+       rshi(r0, r0, 1);        /* r0 is 64 */
+       patch_at(w, _jit->pc.w);
+#endif
+#else
+       LZCNT(r1, r0);
+    }
+    else
+       fallback_clz(r0, r1);
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.lzcnt) {
+       bitswap(r0, r1);
+       clor(r0, r0);
+    }
+    else
+       fallback_cto(r0, r1);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_cpu.lzcnt) {
+       bitswap(r0, r1);
+       clzr(r0, r0);
+    }
+    else
+       fallback_ctz(r0, r1);
+}
+
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
 static void
 _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -2310,7 +2451,7 @@ _bw(jit_state_t *_jit, jit_int32_t cc,
 #  if __WORDSIZE == 32
        B(cc, (i0 - w) >> 2);
 #  else
 #  if __WORDSIZE == 32
        B(cc, (i0 - w) >> 2);
 #  else
-       B(cc, (i0 - w) >> 2);
+       BP(cc, (i0 - w) >> 2);
 #  endif
        NOP();
     }
 #  endif
        NOP();
     }
@@ -2430,14 +2571,15 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
     NOP();
 }
 
     NOP();
 }
 
-static void
+static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
     jit_int32_t                reg;
     jit_int32_t                reg;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (s22_p(w)) {
-       BA(w);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    if (s22_p(d)) {
+       BA(d);
        NOP();
     }
     else {
        NOP();
     }
     else {
@@ -2446,6 +2588,7 @@ _jmpi(jit_state_t *_jit, jit_word_t i0)
        jmpr(rn(reg));
        jit_unget_reg(reg);
     }
        jmpr(rn(reg));
        jit_unget_reg(reg);
     }
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -2467,17 +2610,19 @@ _callr(jit_state_t *_jit, jit_int32_t r0)
     NOP();
 }
 
     NOP();
 }
 
-static void
+static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         w;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (s30_p(w)) {
-       CALLI(w);
+    jit_word_t         d, w;
+    w = _jit->pc.w;
+    d = (i0 - w) >> 2;
+    if (s30_p(d)) {
+       CALLI(d);
        NOP();
     }
     else
        NOP();
     }
     else
-       (void)calli_p(i0);
+       w = calli_p(i0);
+    return (w);
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -2551,24 +2696,24 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
 {
     if (_jitc->function->assume_frame)
        return;
 {
     if (_jitc->function->assume_frame)
        return;
-    /* (most) other backends do not save incoming arguments, so,
-     * only save locals here */
+    if (_jitc->function->allocar)
+       subi(_SP_REGNO, _FP_REGNO, _jitc->function->stack);
     if (jit_regset_tstbit(&_jitc->function->regset, _L0))
     if (jit_regset_tstbit(&_jitc->function->regset, _L0))
-       ldxi(_L0_REGNO, _FP_REGNO, _jitc->function->stack + OFF(0));
+       ldxi(_L0_REGNO, _SP_REGNO, _jitc->function->stack + OFF(0));
     if (jit_regset_tstbit(&_jitc->function->regset, _L1))
     if (jit_regset_tstbit(&_jitc->function->regset, _L1))
-       ldxi(_L1_REGNO, _FP_REGNO, _jitc->function->stack + OFF(1));
+       ldxi(_L1_REGNO, _SP_REGNO, _jitc->function->stack + OFF(1));
     if (jit_regset_tstbit(&_jitc->function->regset, _L2))
     if (jit_regset_tstbit(&_jitc->function->regset, _L2))
-       ldxi(_L2_REGNO, _FP_REGNO, _jitc->function->stack + OFF(2));
+       ldxi(_L2_REGNO, _SP_REGNO, _jitc->function->stack + OFF(2));
     if (jit_regset_tstbit(&_jitc->function->regset, _L3))
     if (jit_regset_tstbit(&_jitc->function->regset, _L3))
-       ldxi(_L3_REGNO, _FP_REGNO, _jitc->function->stack + OFF(3));
+       ldxi(_L3_REGNO, _SP_REGNO, _jitc->function->stack + OFF(3));
     if (jit_regset_tstbit(&_jitc->function->regset, _L4))
     if (jit_regset_tstbit(&_jitc->function->regset, _L4))
-       ldxi(_L4_REGNO, _FP_REGNO, _jitc->function->stack + OFF(4));
+       ldxi(_L4_REGNO, _SP_REGNO, _jitc->function->stack + OFF(4));
     if (jit_regset_tstbit(&_jitc->function->regset, _L5))
     if (jit_regset_tstbit(&_jitc->function->regset, _L5))
-       ldxi(_L5_REGNO, _FP_REGNO, _jitc->function->stack + OFF(5));
+       ldxi(_L5_REGNO, _SP_REGNO, _jitc->function->stack + OFF(5));
     if (jit_regset_tstbit(&_jitc->function->regset, _L6))
     if (jit_regset_tstbit(&_jitc->function->regset, _L6))
-       ldxi(_L6_REGNO, _FP_REGNO, _jitc->function->stack + OFF(6));
+       ldxi(_L6_REGNO, _SP_REGNO, _jitc->function->stack + OFF(6));
     if (jit_regset_tstbit(&_jitc->function->regset, _L7))
     if (jit_regset_tstbit(&_jitc->function->regset, _L7))
-       ldxi(_L7_REGNO, _FP_REGNO, _jitc->function->stack + OFF(7));
+       ldxi(_L7_REGNO, _SP_REGNO, _jitc->function->stack + OFF(7));
     RESTOREI(0, 0, 0);
     RETL();
     NOP();
     RESTOREI(0, 0, 0);
     RETL();
     NOP();
@@ -2649,6 +2794,11 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
        else
            abort();
     }
        else
            abort();
     }
+    else if (i.op.b == 1) {
+       assert(s30_p((label - instr) >> 2));
+       i.disp30.b = (label - instr) >> 2;
+       u.i[0] = i.v;
+    }
     else
        abort();
 }
     else
        abort();
 }
index 9531347..d0e7e81 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -498,6 +498,44 @@ _f3f(jit_state_t *_jit, jit_int32_t rd,
 }
 
 #  if __WORDSIZE == 64
 }
 
 #  if __WORDSIZE == 64
+/* Handle the special case of using all float registers, as exercised
+ * in check/carg.c.
+ * For example:
+ *     putargr_f JIT_F0 $ARG
+ * where JIT_F0 is %f32 and $ARG is %f31 and if %f30 (the mapping for %f31)
+ * is live, the jit_get_reg() call might return %f30, but, because it is
+ * live, will spill/reload it, generating assembly:
+ *
+ *     std  %f30, [ %fp + OFFS ]
+ *     fmovd  %f32, %f30
+ *     fmovs  %f30, %f31
+ *     ldd  [ %fp + OFFS ], %f30
+ *
+ * what basically becomes a noop as it restores the old value.
+ */
+#define get_sng_reg(u)         _get_sng_reg(_jit, u)
+static jit_int32_t
+_get_sng_reg(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                reg, tmp;
+    /* Attempt to get a nospill register */
+    reg = jit_get_reg(CLASS_SNG | jit_class_nospill | jit_class_chk);
+    if (reg == JIT_NOREG) {
+       /* Will need to spill, so allow spilling it. */
+       reg = jit_get_reg(CLASS_SNG);
+       /* If the special condition happens, allocate another one.
+        * This will generate uglier machine code (code for floats
+        * is already ugly), but will work, but doing a double
+        * spill/reload; the first one being a noop.  */
+       if (rn(reg) == r0 - 1) {
+           tmp = reg;
+           reg = jit_get_reg(CLASS_SNG);
+           jit_unget_reg(tmp);
+       }
+    }
+    return (reg);
+}
+
 static void
 _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -507,7 +545,7 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
            if (single_precision_p(r1))
                FMOVS(r1, r0);
            else {
            if (single_precision_p(r1))
                FMOVS(r1, r0);
            else {
-               t1 = jit_get_reg(CLASS_SNG);
+               t1 = get_sng_reg(r0);
                movr_d(rn(t1), r1);
                FMOVS(rn(t1), r0);
                jit_unget_reg(t1);
                movr_d(rn(t1), r1);
                FMOVS(rn(t1), r0);
                jit_unget_reg(t1);
@@ -515,13 +553,13 @@ _movr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
        }
        else {
            if (single_precision_p(r1)) {
        }
        else {
            if (single_precision_p(r1)) {
-               t0 = jit_get_reg(CLASS_SNG);
+               t0 = get_sng_reg(r0);
                FMOVS(r1, rn(t0));
                movr_d(r0, rn(t0));
                jit_unget_reg(t0);
            }
            else {
                FMOVS(r1, rn(t0));
                movr_d(r0, rn(t0));
                jit_unget_reg(t0);
            }
            else {
-               t1 = jit_get_reg(CLASS_SNG);
+               t1 = get_sng_reg(r0);
                movr_d(rn(t1), r1);
                FMOVS(rn(t1), rn(t1));
                movr_d(r0, rn(t1));
                movr_d(rn(t1), r1);
                FMOVS(rn(t1), rn(t1));
                movr_d(r0, rn(t1));
@@ -1491,7 +1529,12 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     assert(_jitc->function->self.call & jit_call_varargs);
 
     /* Load argument. */
     assert(_jitc->function->self.call & jit_call_varargs);
 
     /* Load argument. */
+#if __WORDSIZE == 64
     ldr_d(r0, r1);
     ldr_d(r0, r1);
+#else
+    ldr_f(r0, r1);
+    ldxi_f(r0 + 1, r1, 4);
+#endif
 
     /* Update vararg stack pointer. */
     addi(r1, r1, 8);
 
     /* Update vararg stack pointer. */
     addi(r1, r1, 8);
index 265769d..95954d9 100644 (file)
@@ -1,10 +1,11 @@
 #if __WORDSIZE == 32
 #if __WORDSIZE == 32
-#define JIT_INSTR_MAX 52
+#define JIT_INSTR_MAX 180
     0, /* data */
     0, /* live */
     0, /* align */
     0, /* save */
     0, /* load */
     0, /* data */
     0, /* live */
     0, /* align */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* #name */
     0, /* #note */
     0, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     4, /* va_start */
     8, /* va_arg */
-    8, /* va_arg_d */
+    12,        /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     0, /* va_end */
     4, /* addr */
     12,        /* addi */
     8, /* movi */
     16,        /* movnr */
     16,        /* movzr */
     8, /* movi */
     16,        /* movnr */
     16,        /* movzr */
+    24,        /* casr */
+    32,        /* casi */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    0, /* bswapr_ul */
     8, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     8, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     12,        /* bxsubr_u */
     12,        /* bxsubi_u */
     8, /* jmpr */
     12,        /* bxsubr_u */
     12,        /* bxsubi_u */
     8, /* jmpr */
-    16,        /* jmpi */
+    8, /* jmpi */
     8, /* callr */
     8, /* callr */
-    16,        /* calli */
+    8, /* calli */
     0, /* prepare */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    0, /* bswapr_ul */
-    24,        /* casr */
-    32,        /* casi */
+    176,       /* clo */
+    148,       /* clz */
+    180,       /* cto */
+    152,       /* ctz */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 116
+#define JIT_INSTR_MAX 216
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    4, /* align */
+    24,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     4, /* label */
     0, /* #name */
     0, /* #note */
     4, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     4, /* va_start */
     8, /* va_arg */
     8, /* va_arg_d */
     24,        /* movi */
     16,        /* movnr */
     16,        /* movzr */
     24,        /* movi */
     16,        /* movnr */
     16,        /* movzr */
+    24,        /* casr */
+    44,        /* casi */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     8, /* extr_i */
     8, /* extr_ui */
     8, /* extr_c */
     4, /* extr_uc */
     8, /* extr_s */
     8, /* extr_us */
     8, /* extr_i */
     8, /* extr_ui */
+    20,        /* bswapr_us */
+    52,        /* bswapr_ui */
+    116,       /* bswapr_ul */
     8, /* htonr_us */
     8, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
     24,        /* ldi_c */
     4, /* ldr_uc */
     8, /* htonr_us */
     8, /* htonr_ui */
     4, /* htonr_ul */
     4, /* ldr_c */
     24,        /* ldi_c */
     4, /* ldr_uc */
-    24,        /* ldi_uc */
+    28,        /* ldi_uc */
     4, /* ldr_s */
     4, /* ldr_s */
-    24,        /* ldi_s */
+    28,        /* ldi_s */
     4, /* ldr_us */
     4, /* ldr_us */
-    24,        /* ldi_us */
+    28,        /* ldi_us */
     4, /* ldr_i */
     4, /* ldr_i */
-    24,        /* ldi_i */
+    28,        /* ldi_i */
     4, /* ldr_ui */
     4, /* ldr_ui */
-    24,        /* ldi_ui */
+    28,        /* ldi_ui */
     4, /* ldr_l */
     4, /* ldr_l */
-    24,        /* ldi_l */
+    28,        /* ldi_l */
     4, /* ldxr_c */
     24,        /* ldxi_c */
     4, /* ldxr_uc */
     4, /* ldxr_c */
     24,        /* ldxi_c */
     4, /* ldxr_uc */
     4, /* ldxr_l */
     24,        /* ldxi_l */
     4, /* str_c */
     4, /* ldxr_l */
     24,        /* ldxi_l */
     4, /* str_c */
-    24,        /* sti_c */
+    28,        /* sti_c */
     4, /* str_s */
     4, /* str_s */
-    24,        /* sti_s */
+    28,        /* sti_s */
     4, /* str_i */
     4, /* str_i */
-    24,        /* sti_i */
+    28,        /* sti_i */
     4, /* str_l */
     4, /* str_l */
-    24,        /* sti_l */
+    28,        /* sti_l */
     4, /* stxr_c */
     24,        /* stxi_c */
     4, /* stxr_s */
     4, /* stxr_c */
     24,        /* stxi_c */
     4, /* stxr_s */
     12,        /* bxsubr_u */
     12,        /* bxsubi_u */
     8, /* jmpr */
     12,        /* bxsubr_u */
     12,        /* bxsubi_u */
     8, /* jmpr */
-    32,        /* jmpi */
+    8, /* jmpi */
     8, /* callr */
     8, /* callr */
-    32,        /* calli */
+    40,        /* calli */
     0, /* prepare */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     16,        /* truncr_f_l */
     20,        /* extr_f */
     12,        /* extr_d_f */
     16,        /* truncr_f_l */
     20,        /* extr_f */
     12,        /* extr_d_f */
-    16,        /* movr_f */
+    24,        /* movr_f */
     32,        /* movi_f */
     8, /* ldr_f */
     32,        /* movi_f */
     8, /* ldr_f */
-    28,        /* ldi_f */
+    32,        /* ldi_f */
     8, /* ldxr_f */
     28,        /* ldxi_f */
     8, /* str_f */
     8, /* ldxr_f */
     28,        /* ldxi_f */
     8, /* str_f */
-    28,        /* sti_f */
+    32,        /* sti_f */
     8, /* stxr_f */
     28,        /* stxi_f */
     20,        /* bltr_f */
     8, /* stxr_f */
     28,        /* stxi_f */
     20,        /* bltr_f */
     20,        /* bler_f */
     44,        /* blei_f */
     28,        /* beqr_f */
     20,        /* bler_f */
     44,        /* blei_f */
     28,        /* beqr_f */
-    60,        /* beqi_f */
+    52,        /* beqi_f */
     20,        /* bger_f */
     44,        /* bgei_f */
     20,        /* bgtr_f */
     44,        /* bgti_f */
     20,        /* bner_f */
     20,        /* bger_f */
     44,        /* bgei_f */
     20,        /* bgtr_f */
     44,        /* bgti_f */
     20,        /* bner_f */
-    44,        /* bnei_f */
+    60,        /* bnei_f */
     20,        /* bunltr_f */
     44,        /* bunlti_f */
     20,        /* bunler_f */
     20,        /* bunltr_f */
     44,        /* bunlti_f */
     20,        /* bunler_f */
     4, /* movr_d */
     32,        /* movi_d */
     4, /* ldr_d */
     4, /* movr_d */
     32,        /* movi_d */
     4, /* ldr_d */
-    24,        /* ldi_d */
+    28,        /* ldi_d */
     4, /* ldxr_d */
     24,        /* ldxi_d */
     4, /* str_d */
     4, /* ldxr_d */
     24,        /* ldxi_d */
     4, /* str_d */
-    24,        /* sti_d */
+    28,        /* sti_d */
     4, /* stxr_d */
     24,        /* stxi_d */
     12,        /* bltr_d */
     4, /* stxr_d */
     24,        /* stxi_d */
     12,        /* bltr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    20,        /* bswapr_us */
-    52,        /* bswapr_ui */
-    116,       /* bswapr_ul */
-    24,        /* casr */
-    44,        /* casi */
+    216,       /* clo */
+    188,       /* clz */
+    204,       /* cto */
+    176,       /* ctz */
 #endif /* __WORDSIZE */
 #endif /* __WORDSIZE */
index cd45d23..9e837d8 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
  *     Paulo Cesar Pereira de Andrade
  */
 
  *     Paulo Cesar Pereira de Andrade
  */
 
+/* Handling SIGILL should not be done by Lightning, but can either use
+ * sample, or use another approach to set jit_cpu.lzcnt
+ */
+#define CHECK_LZCNT    0
+
+#if CHECK_LZCNT
+#include <signal.h>
+#include <setjmp.h>
+#endif
+
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 6)
 #if __WORDSIZE == 32
 #  define jit_arg_d_reg_p(i)           ((i) >= 0 && (i) < 5)
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 6)
 #if __WORDSIZE == 32
 #  define jit_arg_d_reg_p(i)           ((i) >= 0 && (i) < 5)
@@ -40,11 +50,13 @@ static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define PROTO                          1
 #  include "jit_sparc-cpu.c"
 #  include "jit_sparc-fpu.c"
 #define PROTO                          1
 #  include "jit_sparc-cpu.c"
 #  include "jit_sparc-fpu.c"
+#  include "jit_fallback.c"
 #undef PROTO
 
 /*
  * Initialization
  */
 #undef PROTO
 
 /*
  * Initialization
  */
+jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
     { 0x00,                            "%g0" },
     { 0x01,                            "%g1" },
 jit_register_t         _rvs[] = {
     { 0x00,                            "%g0" },
     { 0x01,                            "%g1" },
@@ -147,13 +159,45 @@ jit_register_t            _rvs[] = {
 #  endif
     { _NOREG,                          "<none>" },
 };
 #  endif
     { _NOREG,                          "<none>" },
 };
+#if CHECK_LZCNT
+sigjmp_buf             jit_env;
+#endif
 
 /*
  * Implementation
  */
 
 /*
  * Implementation
  */
+#if CHECK_LZCNT
+static void
+sigill_handler(int signum)
+{
+    jit_cpu.lzcnt = 0;
+    siglongjmp(jit_env, 1);
+}
+#endif
+
 void
 jit_get_cpu(void)
 {
 void
 jit_get_cpu(void)
 {
+#if CHECK_LZCNT
+    int                        g2;
+    struct             sigaction new_action, old_action;
+    new_action.sa_handler = sigill_handler;
+    sigemptyset(&new_action.sa_mask);
+    new_action.sa_flags = 0;
+    sigaction(SIGILL, NULL, &old_action);
+    if (old_action.sa_handler != SIG_IGN) {
+       sigaction(SIGILL, &new_action, NULL);
+       if (!sigsetjmp(jit_env, 1)) {
+           jit_cpu.lzcnt = 1;
+           /* lzcnt %g2, %g2 */
+           __asm__ volatile("mov %%g2, %0; .long 0xa3b0021; mov %0, %%g2"
+                            : "=r" (g2));
+           sigaction(SIGILL, &old_action, NULL);
+       }
+    }
+#else
+    jit_cpu.lzcnt = 0;
+#endif
 }
 
 void
 }
 
 void
@@ -184,7 +228,7 @@ _jit_prolog(jit_state_t *_jit)
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
     _jitc->function->self.size = stack_framesize;
     _jitc->function->self.argi = _jitc->function->self.argf =
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
     _jitc->function->self.size = stack_framesize;
     _jitc->function->self.argi = _jitc->function->self.argf =
-       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+       _jitc->function->self.alen = 0;
     /* float conversion */
 #  if __WORDSIZE == 32
     _jitc->function->self.aoff = -8;
     /* float conversion */
 #  if __WORDSIZE == 32
     _jitc->function->self.aoff = -8;
@@ -265,20 +309,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -339,12 +381,13 @@ jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 #  if __WORDSIZE == 32
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 #  if __WORDSIZE == 32
-    if (u->code == jit_code_arg || u->code == jit_code_arg_f)
+    if ((u->code >= jit_code_arg_c && u->code <= jit_code_arg) ||
+       u->code == jit_code_arg_f)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_d);
     return (jit_arg_d_reg_p(u->u.w));
 #  else
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_d);
     return (jit_arg_d_reg_p(u->u.w));
 #  else
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f);
     return (jit_arg_d_reg_p(u->u.w));
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_d || u->code == jit_code_arg_f);
     return (jit_arg_d_reg_p(u->u.w));
@@ -379,11 +422,15 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
     if (jit_arg_reg_p(_jitc->function->self.argi))
        offset = _jitc->function->self.argi++;
     else {
@@ -394,7 +441,7 @@ _jit_arg(jit_state_t *_jit)
        offset = BIAS(_jitc->function->self.size);
        _jitc->function->self.size += sizeof(jit_word_t);
     }
        offset = BIAS(_jitc->function->self.size);
        _jitc->function->self.size += sizeof(jit_word_t);
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -471,7 +518,7 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _I0 + v->u.w);
     jit_inc_synth_wp(getarg_c, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, _I0 + v->u.w);
@@ -484,7 +531,7 @@ _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _I0 + v->u.w);
     jit_inc_synth_wp(getarg_uc, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, _I0 + v->u.w);
@@ -497,7 +544,7 @@ _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _I0 + v->u.w);
     jit_inc_synth_wp(getarg_s, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, _I0 + v->u.w);
@@ -510,7 +557,7 @@ _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _I0 + v->u.w);
     jit_inc_synth_wp(getarg_us, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, _I0 + v->u.w);
@@ -523,7 +570,7 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #  if __WORDSIZE == 64
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w)) {
 #  if __WORDSIZE == 64
@@ -542,7 +589,7 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _I0 + v->u.w);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, _I0 + v->u.w);
@@ -555,7 +602,7 @@ _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _I0 + v->u.w);
     jit_inc_synth_wp(getarg_i, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, _I0 + v->u.w);
@@ -566,10 +613,10 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 #  endif
 
 void
 #  endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_I0 + v->u.w, u);
     else
     if (jit_arg_reg_p(v->u.w))
        jit_movr(_I0 + v->u.w, u);
     else
@@ -578,11 +625,11 @@ _jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_I0 + v->u.w, u);
     else {
     if (jit_arg_reg_p(v->u.w))
        jit_movi(_I0 + v->u.w, u);
     else {
@@ -795,9 +842,9 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
 }
 
 void
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_O0 + _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movr(_O0 + _jitc->function->call.argi, u);
@@ -816,10 +863,10 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 }
 
 void
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_O0 + _jitc->function->call.argi, u);
     jit_link_prepare();
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
        jit_movi(_O0 + _jitc->function->call.argi, u);
@@ -1193,6 +1240,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1326,6 +1374,9 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_note:         case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
@@ -1506,6 +1557,10 @@ _emit_code(jit_state_t *_jit)
                break;
                case_rr(neg,);
                case_rr(com,);
                break;
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_brr(blt,);
                case_brw(blt,);
                case_brr(blt, _u);
                case_brr(blt,);
                case_brw(blt,);
                case_brr(blt, _u);
@@ -1723,7 +1778,12 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s22_p(word >> 2))
+                           word = jmpi(_jit->pc.w);
+                       else
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
@@ -1738,9 +1798,17 @@ _emit_code(jit_state_t *_jit)
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
                    temp = node->u.n;
                    assert(temp->code == jit_code_label ||
                           temp->code == jit_code_epilog);
-                   word = calli_p(temp->u.w);
-                   if (!(temp->flag & jit_flag_patch))
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if (s30_p(word >> 2))
+                           word = calli(_jit->pc.w);
+                       else
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                        patch(word, node);
+                   }
                }
                else
                    calli(node->u.w);
                }
                else
                    calli(node->u.w);
@@ -1749,6 +1817,7 @@ _emit_code(jit_state_t *_jit)
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
@@ -1769,6 +1838,16 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
@@ -1793,11 +1872,23 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#if __WORDSIZE == 64
+          case jit_code_arg_l:
+#endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -1807,10 +1898,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -1882,6 +1989,7 @@ _emit_code(jit_state_t *_jit)
 #define CODE                           1
 #  include "jit_sparc-cpu.c"
 #  include "jit_sparc-fpu.c"
 #define CODE                           1
 #  include "jit_sparc-cpu.c"
 #  include "jit_sparc-fpu.c"
+#  include "jit_fallback.c"
 #undef CODE
 
 void
 #undef CODE
 
 void
index 1a473de..f0e4155 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
 #  else
 #    define il(l)                      ii(l)
 #  endif
 #  else
 #    define il(l)                      ii(l)
 #  endif
-#  define patch_abs(instr, label)                                      \
-       *(jit_word_t *)(instr - sizeof(jit_word_t)) = label
-#  define patch_rel(instr, label)                                      \
-       *(jit_int32_t *)(instr - 4) = label - instr
-#  define patch_rel_char(instr, label)                                 \
-       *(jit_int8_t *)(instr - 1) = label - instr
 #  define rex(l, w, r, x, b)           _rex(_jit, l, w, r, x, b)
 static void
 _rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define rex(l, w, r, x, b)           _rex(_jit, l, w, r, x, b)
 static void
 _rex(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
@@ -186,7 +180,8 @@ static void _addi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #define addci(r0, r1, i0)              _addci(_jit, r0, r1, i0)
 static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #define addci(r0, r1, i0)              _addci(_jit, r0, r1, i0)
 static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
-#  define iaddxr(r0, r1)               alur(X86_ADC, r0, r1)
+#  define iaddxr(r0, r1)               _iaddxr(_jit, r0, r1)
+static void _iaddxr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define addxr(r0, r1, r2)            _addxr(_jit, r0, r1, r2)
 static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define iaddxi(r0, i0)               alui(X86_ADC, r0, i0)
 #  define addxr(r0, r1, r2)            _addxr(_jit, r0, r1, r2)
 static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define iaddxi(r0, i0)               alui(X86_ADC, r0, i0)
@@ -308,6 +303,14 @@ static void _incr(jit_state_t*, jit_int32_t, jit_int32_t);
 #    define decr(r0, r1)               _decr(_jit, r0, r1)
 static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  endif
 #    define decr(r0, r1)               _decr(_jit, r0, r1)
 static void _decr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  endif
+#  define clor(r0, r1)                 _clor(_jit, r0, r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define clzr(r0, r1)                 _clzr(_jit, r0, r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctor(r0, r1)                 _ctor(_jit, r0, r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define ctzr(r0, r1)                 _ctzr(_jit, r0, r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define cr(code, r0, r1, r2)         _cr(_jit, code, r0, r1, r2)
 static void
 _cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define cr(code, r0, r1, r2)         _cr(_jit, code, r0, r1, r2)
 static void
 _cr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t, jit_int32_t);
@@ -358,7 +361,13 @@ static void _movr(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define imovi(r0, i0)                        _imovi(_jit, r0, i0)
 static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
 #  define movi(r0, i0)                 _movi(_jit, r0, i0)
 #  define imovi(r0, i0)                        _imovi(_jit, r0, i0)
 static void _imovi(jit_state_t*, jit_int32_t, jit_word_t);
 #  define movi(r0, i0)                 _movi(_jit, r0, i0)
-static void _movi(jit_state_t*, jit_int32_t, jit_word_t);
+static
+#  if CAN_RIP_ADDRESS
+jit_word_t
+#  else
+void
+#  endif
+_movi(jit_state_t*, jit_int32_t, jit_word_t);
 #  define movi_p(r0, i0)               _movi_p(_jit, r0, i0)
 static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
 #  define movcr(r0, r1)                        _movcr(_jit, r0, r1)
 #  define movi_p(r0, i0)               _movi_p(_jit, r0, i0)
 static jit_word_t _movi_p(jit_state_t*, jit_int32_t, jit_word_t);
 #  define movcr(r0, r1)                        _movcr(_jit, r0, r1)
@@ -547,7 +556,7 @@ static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 #  define jng(i0)                      jcc(X86_CC_NG, i0)
 #  define jg(i0)                       jcc(X86_CC_G, i0)
 #  define jnle(i0)                     jcc(X86_CC_NLE, i0)
 #  define jng(i0)                      jcc(X86_CC_NG, i0)
 #  define jg(i0)                       jcc(X86_CC_G, i0)
 #  define jnle(i0)                     jcc(X86_CC_NLE, i0)
-static void _jcc(jit_state_t*, jit_int32_t, jit_word_t);
+static jit_word_t _jcc(jit_state_t*, jit_int32_t, jit_word_t);
 #  define jccs(code, i0)               _jccs(_jit, code, i0)
 #  define jos(i0)                      jccs(X86_CC_O, i0)
 #  define jnos(i0)                     jccs(X86_CC_NO, i0)
 #  define jccs(code, i0)               _jccs(_jit, code, i0)
 #  define jos(i0)                      jccs(X86_CC_O, i0)
 #  define jnos(i0)                     jccs(X86_CC_NO, i0)
@@ -579,13 +588,15 @@ static void _jcc(jit_state_t*, jit_int32_t, jit_word_t);
 #  define jngs(i0)                     jccs(X86_CC_NG, i0)
 #  define jgs(i0)                      jccs(X86_CC_G, i0)
 #  define jnles(i0)                    jccs(X86_CC_NLE, i0)
 #  define jngs(i0)                     jccs(X86_CC_NG, i0)
 #  define jgs(i0)                      jccs(X86_CC_G, i0)
 #  define jnles(i0)                    jccs(X86_CC_NLE, i0)
-static void _jccs(jit_state_t*, jit_int32_t, jit_word_t);
+static jit_word_t _jccs(jit_state_t*, jit_int32_t, jit_word_t);
 #  define jcr(code, i0, r0, r1)                _jcr(_jit, code, i0, r0, r1)
 #  define jcr(code, i0, r0, r1)                _jcr(_jit, code, i0, r0, r1)
-static void _jcr(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
+static jit_word_t _jcr(jit_state_t*,
+                      jit_int32_t,jit_word_t,jit_int32_t,jit_int32_t);
 #  define jci(code, i0, r0, i1)                _jci(_jit, code, i0, r0, i1)
 #  define jci(code, i0, r0, i1)                _jci(_jit, code, i0, r0, i1)
-static void _jci(jit_state_t*,jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
+static jit_word_t _jci(jit_state_t*,
+                      jit_int32_t,jit_word_t,jit_int32_t,jit_word_t);
 #  define jci0(code, i0, r0)           _jci0(_jit, code, i0, r0)
 #  define jci0(code, i0, r0)           _jci0(_jit, code, i0, r0)
-static void _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
+static jit_word_t _jci0(jit_state_t*, jit_int32_t, jit_word_t, jit_int32_t);
 #  define bltr(i0, r0, r1)             _bltr(_jit, i0, r0, r1)
 static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 #  define blti(i0, r0, i1)             _blti(_jit, i0, r0, i1)
 #  define bltr(i0, r0, r1)             _bltr(_jit, i0, r0, r1)
 static jit_word_t _bltr(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 #  define blti(i0, r0, i1)             _blti(_jit, i0, r0, i1)
@@ -687,7 +698,7 @@ static jit_word_t _jmpi_p(jit_state_t*, jit_word_t);
 #    define jmpi_p(i0)                 jmpi(i0)
 #  endif
 #  define jmpsi(i0)                    _jmpsi(_jit, i0)
 #    define jmpi_p(i0)                 jmpi(i0)
 #  endif
 #  define jmpsi(i0)                    _jmpsi(_jit, i0)
-static void _jmpsi(jit_state_t*, jit_uint8_t);
+static jit_word_t _jmpsi(jit_state_t*, jit_uint8_t);
 #  define prolog(node)                 _prolog(_jit, node)
 static void _prolog(jit_state_t*, jit_node_t*);
 #  define epilog(node)                 _epilog(_jit, node)
 #  define prolog(node)                 _prolog(_jit, node)
 static void _prolog(jit_state_t*, jit_node_t*);
 #  define epilog(node)                 _epilog(_jit, node)
@@ -698,8 +709,8 @@ static void _vastart(jit_state_t*, jit_int32_t);
 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define vaarg_d(r0, r1, i0)          _vaarg_d(_jit, r0, r1, i0)
 static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
 static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
 #  define vaarg_d(r0, r1, i0)          _vaarg_d(_jit, r0, r1, i0)
 static void _vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_bool_t);
-#  define patch_at(node, instr, label) _patch_at(_jit, node, instr, label)
-static void _patch_at(jit_state_t*, jit_node_t*, jit_word_t, jit_word_t);
+#  define patch_at(instr, label)       _patch_at(_jit, instr, label)
+static void _patch_at(jit_state_t*, jit_word_t, jit_word_t);
 #  if !defined(HAVE_FFSL)
 #    if __X32
 #      define ffsl(i)                  __builtin_ffs(i)
 #  if !defined(HAVE_FFSL)
 #    if __X32
 #      define ffsl(i)                  __builtin_ffs(i)
@@ -735,11 +746,16 @@ _rx(jit_state_t *_jit, jit_int32_t rd, jit_int32_t md,
 {
     if (ri == _NOREG) {
        if (rb == _NOREG) {
 {
     if (ri == _NOREG) {
        if (rb == _NOREG) {
-#if __X32
-           mrm(0x00, r7(rd), 0x05);
-#else
-           mrm(0x00, r7(rd), 0x04);
-           sib(_SCL1, 0x04, 0x05);
+           /* Use ms == _SCL8 to tell it is a %rip relative displacement */
+#if __X64
+           if (ms == _SCL8)
+#endif
+               mrm(0x00, r7(rd), 0x05);
+#if __X64
+           else {
+               mrm(0x00, r7(rd), 0x04);
+               sib(_SCL1, 0x04, 0x05);
+           }
 #endif
            ii(md);
        }
 #endif
            ii(md);
        }
@@ -1036,6 +1052,49 @@ _addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     }
 }
 
     }
 }
 
+static void
+_iaddxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    /* FIXME: this is not doing what I did expect for the simple test case:
+     *         mov  $0xffffffffffffffff, %rax  -- rax = 0xffffffffffffffff (-1)
+     *         mov  $0xffffffffffffffff, %r10  -- r10 = 0xffffffffffffffff (-1)
+     *         mov  $0x1, %r11d                -- r11 = 1
+     *         xor  %rbx, %rbx                 -- rbx = 0
+     * (gdb) p $eflags
+     * $1 = [ PF ZF IF ]
+     *         add  %r11, %rax                 -- r11 = 0x10000000000000000 (0)
+     *                             does not fit in 64 bit ^
+     * (gdb) p $eflags
+     * $2 = [ CF PF AF ZF IF ]
+     *         adcx %r10, %rbx                 -- r10 = 0xffffffffffffffff (-1)
+     * (gdb) p $eflags
+     * $3 = [ CF PF AF ZF IF ]
+     * (gdb) p/x $r10
+     * $4 = 0xffffffffffffffff
+     * but, r10 should be zero, as it is:
+     * -1 (%r10) + 0 (%rbx) + carry (!!eflags.CF)
+     * FIXME: maybe should only use ADCX in the third operation onward, that
+     * is, after the first ADC? In either case, the add -1+0+carry should
+     * have used and consumed the carry? At least this is what is expected
+     * in Lightning...
+     */
+#if 0
+    /* Significantly longer instruction, but avoid cpu stalls as only
+     * the carry flag is used in a sequence. */
+    if (jit_cpu.adx) {
+       /* ADCX */
+       ic(0x66);
+       rex(0, WIDE, r1, _NOREG, r0);
+       ic(0x0f);
+       ic(0x38);
+       ic(0xf6);
+       mrm(0x03, r7(r1), r7(r0));
+    }
+    else
+#endif
+       alur(X86_ADC, r0, r1);
+}
+
 static void
 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
 static void
 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
@@ -1051,7 +1110,12 @@ static void
 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
 _addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+    if (
+#if 0
+       /* Do not mix ADC and ADCX */
+       !jit_cpu.adx &&
+#endif
+       can_sign_extend_int_p(i0)) {
        movr(r0, r1);
        iaddxi(r0, i0);
     }
        movr(r0, r1);
        iaddxi(r0, i0);
     }
@@ -1913,6 +1977,88 @@ _decr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 }
 #endif
 
 }
 #endif
 
+static void
+_clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    clzr(r0, r0);
+}
+
+static void
+_clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w, x;
+    /* LZCNT */
+    if (jit_cpu.abm)
+       ic(0xf3);
+    /* else BSR */
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xbd);
+    mrm(0x3, r7(r0), r7(r1));
+    if (!jit_cpu.abm) {
+       /* jump if undefined: r1 == 0 */
+       w = jccs(X86_CC_E, _jit->pc.w);
+       /* count leading zeros */
+       rsbi(r0, r0, __WORDSIZE - 1);
+       /* done */
+       x = jmpsi(_jit->pc.w);
+       /* if r1 == 0 */
+       patch_at(w, _jit->pc.w);
+       movi(r0, __WORDSIZE);
+       /* not undefined */
+       patch_at(x, _jit->pc.w);
+    }
+    /* LZCNT has defined behavior for value zero and count leading zeros */
+}
+
+static void
+_ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    comr(r0, r1);
+    ctzr(r0, r0);
+}
+
+static void
+_ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    if (!jit_cpu.abm) {
+       if (jit_cmov_p())
+           t0 = jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk);
+       else
+           t0 = _NOREG;
+       if (t0 != _NOREG)
+           movi(rn(t0), __WORDSIZE);
+    }
+    /* TZCNT */
+    if (jit_cpu.abm)
+       ic(0xf3);
+    /* else BSF */
+    rex(0, WIDE, r0, _NOREG, r1);
+    ic(0x0f);
+    ic(0xbc);
+    mrm(0x3, r7(r0), r7(r1));
+    if (!jit_cpu.abm) {
+       /* No conditional move or need spill/reload a temporary */
+       if (t0 == _NOREG) {
+           w = jccs(X86_CC_E, _jit->pc.w);
+           movi(r0, __WORDSIZE);
+           patch_at(w, _jit->pc.w);
+       }
+       else {
+           /* CMOVE */
+           rex(0, WIDE, r0, _NOREG, rn(t0));
+           ic(0x0f);
+           ic(0x44);
+           mrm(0x3, r7(r0), r7(rn(t0)));
+           jit_unget_reg(t0);
+       }
+    }
+    /* TZCNT has defined behavior for value zero */
+}
+
 static void
 _cr(jit_state_t *_jit,
     jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 static void
 _cr(jit_state_t *_jit,
     jit_int32_t code, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
@@ -2162,6 +2308,12 @@ _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        ii(i0);
 #  if !__X64_32
     }
        ii(i0);
 #  if !__X64_32
     }
+    else if (can_sign_extend_int_p(i0)) {
+       rex(0, 1, _NOREG, _NOREG, r0);
+       ic(0xc7);
+       ic(0xc0 | r7(r0));
+       ii(i0);
+    }
     else {
        rex(0, 1, _NOREG, _NOREG, r0);
        ic(0xb8 | r7(r0));
     else {
        rex(0, 1, _NOREG, _NOREG, r0);
        ic(0xb8 | r7(r0));
@@ -2174,22 +2326,45 @@ _imovi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 #endif
 }
 
 #endif
 }
 
+#if CAN_RIP_ADDRESS
+static jit_word_t
+#else
 static void
 static void
+#endif
 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
+#if CAN_RIP_ADDRESS
+    jit_word_t         w, rel;
+    w = _jit->pc.w;
+    rel = i0 - (w + 8);
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       /* lea rel(%rip), %r0 */
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       w = _jit->pc.w;
+       ic(0x8d);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
     if (i0)
        imovi(r0, i0);
     else
        ixorr(r0, r0);
     if (i0)
        imovi(r0, i0);
     else
        ixorr(r0, r0);
+#if CAN_RIP_ADDRESS
+    return (w);
+#endif
 }
 
 static jit_word_t
 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
 }
 
 static jit_word_t
 _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
+    jit_word_t         w;
     rex(0, WIDE, _NOREG, _NOREG, r0);
     rex(0, WIDE, _NOREG, _NOREG, r0);
+    w = _jit->pc.w;
     ic(0xb8 | r7(r0));
     il(i0);
     ic(0xb8 | r7(r0));
     il(i0);
-    return (_jit->pc.w);
+    return (w);
 }
 
 static void
 }
 
 static void
@@ -2404,7 +2579,18 @@ static void
 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xbe);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xbe);
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xbe);
@@ -2431,7 +2617,18 @@ static void
 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xb6);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xb6);
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xb6);
@@ -2458,7 +2655,18 @@ static void
 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xbf);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xbf);
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xbf);
@@ -2485,7 +2693,18 @@ static void
 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x0f);
+       ic(0xb7);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xb7);
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x0f);
        ic(0xb7);
@@ -2516,7 +2735,17 @@ static void
 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x63);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
 #if __X64
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x63);
 #if __X64
        rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x63);
@@ -2547,7 +2776,17 @@ static void
 _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#  if !__X64_32
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, 0, r0, _NOREG, _NOREG);
+       ic(0x63);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, 0, r0, _NOREG, _NOREG);
        ic(0x63);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
        rex(0, 0, r0, _NOREG, _NOREG);
        ic(0x63);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
@@ -2555,7 +2794,11 @@ _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
+#  if __X64_32
+       ldr_i(r0, rn(reg));
+#  else
        ldr_ui(r0, rn(reg));
        ldr_ui(r0, rn(reg));
+#  endif
        jit_unget_reg(reg);
     }
 }
        jit_unget_reg(reg);
     }
 }
@@ -2573,8 +2816,15 @@ static void
 _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x8b);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else if (can_sign_extend_int_p(i0)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x8b);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
     }
        ic(0x8b);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
     }
@@ -2778,7 +3028,11 @@ _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
+#  if __X64_32
+       ldxr_i(r0, r1, rn(reg));
+#  else
        ldxr_ui(r0, r1, rn(reg));
        ldxr_ui(r0, r1, rn(reg));
+#  endif
        jit_unget_reg(reg);
     }
 }
        jit_unget_reg(reg);
     }
 }
@@ -2834,7 +3088,27 @@ static void
 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 16 : rel + 16;
+    if (can_sign_extend_int_p(rel)) {
+       if (reg8_p(r0)) {
+           rex(0, 0, r0, _NOREG, _NOREG);
+           ic(0x88);
+           rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+       }
+       else {
+           reg = jit_get_reg(jit_class_gpr|jit_class_rg8);
+           movr(rn(reg), r0);
+           rex(0, 0, rn(reg), _NOREG, _NOREG);
+           ic(0x88);
+           rx(rn(reg), i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+           jit_unget_reg(reg);
+       }
+    }
+    else
+#endif
+    if (address_p(i0)) {
        if (reg8_p(r0)) {
            rex(0, 0, r0, _NOREG, _NOREG);
            ic(0x88);
        if (reg8_p(r0)) {
            rex(0, 0, r0, _NOREG, _NOREG);
            ic(0x88);
@@ -2870,7 +3144,18 @@ static void
 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
 _sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       ic(0x66);
+       rex(0, 0, r0, _NOREG, _NOREG);
+       ic(0x89);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        ic(0x66);
        rex(0, 0, r0, _NOREG, _NOREG);
        ic(0x89);
        ic(0x66);
        rex(0, 0, r0, _NOREG, _NOREG);
        ic(0x89);
@@ -2896,7 +3181,17 @@ static void
 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
 _sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (can_sign_extend_int_p(i0)) {
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, 0, r0, _NOREG, _NOREG);
+       ic(0x89);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
+    if (address_p(i0)) {
        rex(0, 0, r0, _NOREG, _NOREG);
        ic(0x89);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
        rex(0, 0, r0, _NOREG, _NOREG);
        ic(0x89);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
@@ -2922,8 +3217,18 @@ static void
 _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
 _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - _jit->pc.w;
+    rel = rel < 0 ? rel - 8 : rel + 8;
+    if (can_sign_extend_int_p(rel)) {
+       rex(0, WIDE, r0, _NOREG, _NOREG);
+       ic(0x89);
+       rx(r0, i0 - (_jit->pc.w + 5), _NOREG, _NOREG, _SCL8);
+    }
+    else
+#endif
     if (can_sign_extend_int_p(i0)) {
     if (can_sign_extend_int_p(i0)) {
-       rex(0, 1, r0, _NOREG, _NOREG);
+       rex(0, WIDE, r0, _NOREG, _NOREG);
        ic(0x89);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
     }
        ic(0x89);
        rx(r0, i0, _NOREG, _NOREG, _SCL1);
     }
@@ -3084,208 +3389,221 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 }
 #endif
 
 }
 #endif
 
-static void
+static jit_word_t
 _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
 {
 _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
 {
+    jit_word_t         d;
     jit_word_t         w;
     jit_word_t         w;
+    w = _jit->pc.w;
+    d = i0 - (w + 1);
     ic(0x70 | code);
     ic(0x70 | code);
-    w = i0 - (_jit->pc.w + 1);
-    ic(w);
+    ic(d);
+    return (w);
 }
 
 }
 
-static void
+static jit_word_t
 _jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
 {
 _jcc(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
 {
+    jit_word_t         d;
     jit_word_t         w;
     jit_word_t         w;
+    w = _jit->pc.w;
     ic(0x0f);
     ic(0x0f);
+    d = i0 - (w + 6);
     ic(0x80 | code);
     ic(0x80 | code);
-    w = i0 - (_jit->pc.w + 4);
-    ii(w);
+    ii(d);
+    return (w);
 }
 
 }
 
-static void
+static jit_word_t
 _jcr(jit_state_t *_jit,
      jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     alur(X86_CMP, r0, r1);
 _jcr(jit_state_t *_jit,
      jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     alur(X86_CMP, r0, r1);
-    jcc(code, i0);
+    return (jcc(code, i0));
 }
 
 }
 
-static void
+static jit_word_t
 _jci(jit_state_t *_jit,
      jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     alui(X86_CMP, r0, i1);
 _jci(jit_state_t *_jit,
      jit_int32_t code, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
     alui(X86_CMP, r0, i1);
-    jcc(code, i0);
+    return (jcc(code, i0));
 }
 
 }
 
-static void
+static jit_word_t
 _jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
 {
     testr(r0, r0);
 _jci0(jit_state_t *_jit, jit_int32_t code, jit_word_t i0, jit_int32_t r0)
 {
     testr(r0, r0);
-    jcc(code, i0);
+    return (jcc(code, i0));
 }
 
 static jit_word_t
 _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_L, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_L, i0, r0, r1));
 }
 
 static jit_word_t
 _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_L, i0, r0, i1);
-    else               jci0(X86_CC_S, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_L, i0, r0, i1);
+    else               w = jci0(X86_CC_S, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_B, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_B, i0, r0, r1));
 }
 
 static jit_word_t
 _blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_B, i0, r0, i1);
-    else               jci0(X86_CC_B, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_B, i0, r0, i1);
+    else               w = jci0(X86_CC_B, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_LE, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_LE, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_LE, i0, r0, i1);
-    else               jci0(X86_CC_LE, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_LE, i0, r0, i1);
+    else               w = jci0(X86_CC_LE, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_BE, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_BE, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_BE, i0, r0, i1);
-    else               jci0(X86_CC_BE, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_BE, i0, r0, i1);
+    else               w = jci0(X86_CC_BE, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_E, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_E, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_E, i0, r0, i1);
-    else               jci0(X86_CC_E, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_E, i0, r0, i1);
+    else               w = jci0(X86_CC_E, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_GE, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_GE, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_GE, i0, r0, i1);
-    else               jci0(X86_CC_NS, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_GE, i0, r0, i1);
+    else               w = jci0(X86_CC_NS, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    if (r0 == r1)      jmpi(i0);
-    else               jcr (X86_CC_AE, i0, r0, r1);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (r0 == r1)      w = jmpi(i0);
+    else               w = jcr (X86_CC_AE, i0, r0, r1);
+    return (w);
 }
 
 static jit_word_t
 _bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_AE, i0, r0, i1);
-    else               jmpi(i0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_AE, i0, r0, i1);
+    else               w = jmpi(i0);
+    return (w);
 }
 
 static jit_word_t
 _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_G, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_G, i0, r0, r1));
 }
 
 static jit_word_t
 _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    jci(X86_CC_G, i0, r0, i1);
-    return (_jit->pc.w);
+    return (jci(X86_CC_G, i0, r0, i1));
 }
 
 static jit_word_t
 _bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_A, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_A, i0, r0, r1));
 }
 
 static jit_word_t
 _bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_A, i0, r0, i1);
-    else               jci0(X86_CC_NE, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_A, i0, r0, i1);
+    else               w = jci0(X86_CC_NE, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 
 static jit_word_t
 _bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
-    jcr(X86_CC_NE, i0, r0, r1);
-    return (_jit->pc.w);
+    return (jcr(X86_CC_NE, i0, r0, r1));
 }
 
 static jit_word_t
 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
 }
 
 static jit_word_t
 _bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
 {
-    if (i1)            jci (X86_CC_NE, i0, r0, i1);
-    else               jci0(X86_CC_NE, i0, r0);
-    return (_jit->pc.w);
+    jit_word_t         w;
+    if (i1)            w = jci (X86_CC_NE, i0, r0, i1);
+    else               w = jci0(X86_CC_NE, i0, r0);
+    return (w);
 }
 
 static jit_word_t
 _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     testr(r0, r1);
 }
 
 static jit_word_t
 _bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     testr(r0, r1);
-    jnz(i0);
-    return (_jit->pc.w);
+    return (jnz(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3300,16 +3618,14 @@ _bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        testr(r0, rn(reg));
        jit_unget_reg(reg);
     }
        testr(r0, rn(reg));
        jit_unget_reg(reg);
     }
-    jnz(i0);
-    return (_jit->pc.w);
+    return (jnz(i0));
 }
 
 static jit_word_t
 _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     testr(r0, r1);
 }
 
 static jit_word_t
 _bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     testr(r0, r1);
-    jz(i0);
-    return (_jit->pc.w);
+    return (jz(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3324,16 +3640,14 @@ _bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        testr(r0, rn(reg));
        jit_unget_reg(reg);
     }
        testr(r0, rn(reg));
        jit_unget_reg(reg);
     }
-    jz(i0);
-    return (_jit->pc.w);
+    return (jz(i0));
 }
 
 static jit_word_t
 _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
 }
 
 static jit_word_t
 _boaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
-    jo(i0);
-    return (_jit->pc.w);
+    return (jo(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3342,8 +3656,7 @@ _boaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
-       jo(i0);
-       return (_jit->pc.w);
+       return (jo(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3355,8 +3668,7 @@ static jit_word_t
 _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
 _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
-    jc(i0);
-    return (_jit->pc.w);
+    return (jc(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3365,8 +3677,7 @@ _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
-       jc(i0);
-       return (_jit->pc.w);
+       return (jc(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3378,8 +3689,7 @@ static jit_word_t
 _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
 _bxaddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
-    jno(i0);
-    return (_jit->pc.w);
+    return (jno(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3388,8 +3698,7 @@ _bxaddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
-       jno(i0);
-       return (_jit->pc.w);
+       return (jno(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3401,8 +3710,7 @@ static jit_word_t
 _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
 _bxaddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     iaddr(r0, r1);
-    jnc(i0);
-    return (_jit->pc.w);
+    return (jnc(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3411,8 +3719,7 @@ _bxaddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        iaddi(r0, i1);
-       jnc(i0);
-       return (_jit->pc.w);
+       return (jnc(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3424,8 +3731,7 @@ static jit_word_t
 _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
 _bosubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
-    jo(i0);
-    return (_jit->pc.w);
+    return (jo(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3434,8 +3740,7 @@ _bosubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
-       jo(i0);
-       return (_jit->pc.w);
+       return (jo(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3447,8 +3752,7 @@ static jit_word_t
 _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
 _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
-    jc(i0);
-    return (_jit->pc.w);
+    return (jc(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3457,8 +3761,7 @@ _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
-       jc(i0);
-       return (_jit->pc.w);
+       return (jc(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3470,8 +3773,7 @@ static jit_word_t
 _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
 _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
-    jno(i0);
-    return (_jit->pc.w);
+    return (jno(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3480,8 +3782,7 @@ _bxsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
-       jno(i0);
-       return (_jit->pc.w);
+       return (jno(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3493,8 +3794,7 @@ static jit_word_t
 _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
 _bxsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     isubr(r0, r1);
-    jnc(i0);
-    return (_jit->pc.w);
+    return (jnc(i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -3503,8 +3803,7 @@ _bxsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
     jit_int32_t                reg;
     if (can_sign_extend_int_p(i1)) {
        isubi(r0, i1);
-       jnc(i0);
-       return (_jit->pc.w);
+       return (jnc(i0));
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
     }
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     movi(rn(reg), i1);
@@ -3523,35 +3822,39 @@ _callr(jit_state_t *_jit, jit_int32_t r0)
 static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
 static jit_word_t
 _calli(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         word;
     jit_word_t         w;
     jit_word_t         w;
+    jit_word_t         d;
+    jit_word_t         l = _jit->pc.w + 5;
+    d = i0 - l;
 #if __X64
 #if __X64
-    w = i0 - (_jit->pc.w + 5);
-    if ((jit_int32_t)w == w) {
+    if (
+#  if __X64_32
+       !((d < 0) ^ (l < 0)) &&
+#  endif
+       (jit_int32_t)d == d) {
 #endif
 #endif
+       w = _jit->pc.w;
        ic(0xe8);
        ic(0xe8);
-       w = i0 - (_jit->pc.w + 4);
-       ii(w);
-       word = _jit->pc.w;
+       ii(d);
 #if __X64
     }
     else
 #if __X64
     }
     else
-       word = calli_p(i0);
+       w = calli_p(i0);
 #endif
 #endif
-    return (word);
+    return (w);
 }
 
 #if __X64
 static jit_word_t
 _calli_p(jit_state_t *_jit, jit_word_t i0)
 {
 }
 
 #if __X64
 static jit_word_t
 _calli_p(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         word;
+    jit_word_t         w;
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
-    word = movi_p(rn(reg), i0);
+    w = movi_p(rn(reg), i0);
     callr(rn(reg));
     jit_unget_reg(reg);
     callr(rn(reg));
     jit_unget_reg(reg);
-    return (word);
+    return (w);
 }
 #endif
 
 }
 #endif
 
@@ -3566,51 +3869,58 @@ _jmpr(jit_state_t *_jit, jit_int32_t r0)
 static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
 static jit_word_t
 _jmpi(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         word;
     jit_word_t         w;
     jit_word_t         w;
+    jit_word_t         d;
+    jit_word_t         l = _jit->pc.w + 5;
+    d = i0 - l;
 #if __X64
 #if __X64
-    w = i0 - (_jit->pc.w + 5);
-    if ((jit_int32_t)w == w) {
+    if (
+#  if __X64_32
+       !((d < 0) ^ (l < 0)) &&
+#  endif
+       (jit_int32_t)d == d) {
 #endif
 #endif
+       w = _jit->pc.w;
        ic(0xe9);
        ic(0xe9);
-       w = i0 - (_jit->pc.w + 4);
-       ii(w);
-       word = _jit->pc.w;
+       ii(d);
 #if __X64
     }
     else
 #if __X64
     }
     else
-       word = jmpi_p(i0);
+       w = jmpi_p(i0);
 #endif
 #endif
-    return (word);
+    return (w);
 }
 
 #if __X64
 static jit_word_t
 _jmpi_p(jit_state_t *_jit, jit_word_t i0)
 {
 }
 
 #if __X64
 static jit_word_t
 _jmpi_p(jit_state_t *_jit, jit_word_t i0)
 {
-    jit_word_t         word;
+    jit_word_t         w;
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    word = movi_p(rn(reg), i0);
+    w = movi_p(rn(reg), i0);
     jmpr(rn(reg));
     jit_unget_reg(reg);
     jmpr(rn(reg));
     jit_unget_reg(reg);
-    return (word);
+    return (w);
 }
 #endif
 
 }
 #endif
 
-static void
+static jit_word_t
 _jmpsi(jit_state_t *_jit, jit_uint8_t i0)
 {
 _jmpsi(jit_state_t *_jit, jit_uint8_t i0)
 {
+    jit_word_t         w = _jit->pc.w;
     ic(0xeb);
     ic(i0);
     ic(0xeb);
     ic(i0);
+    return (w);
 }
 
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
 }
 
 static void
 _prolog(jit_state_t *_jit, jit_node_t *node)
 {
-    jit_int32_t                reg;
+    jit_int32_t                reg, offs;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
     if (_jitc->function->define_frame || _jitc->function->assume_frame) {
        jit_int32_t     frame = -_jitc->function->frame;
+       jit_check_frame();
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
        assert(_jitc->function->self.aoff >= frame);
        if (_jitc->function->assume_frame)
            return;
@@ -3623,76 +3933,51 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
                                (_jitc->function->self.alen > 32 ?
                                 _jitc->function->self.alen : 32) -
                                /* align stack at 16 bytes */
                                (_jitc->function->self.alen > 32 ?
                                 _jitc->function->self.alen : 32) -
                                /* align stack at 16 bytes */
-                               _jitc->function->self.aoff) + 15) & -16) +
-       stack_adjust;
+                               _jitc->function->self.aoff) + 15) & -16);
 #else
     _jitc->function->stack = (((_jitc->function->self.alen -
 #else
     _jitc->function->stack = (((_jitc->function->self.alen -
-                              _jitc->function->self.aoff) + 15) & -16) +
-       stack_adjust;
+                              _jitc->function->self.aoff) + 15) & -16);
 #endif
 #endif
-    subi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+
+    if (_jitc->function->stack)
+       _jitc->function->need_stack = 1;
+
+    if (!_jitc->function->need_frame && !_jitc->function->need_stack) {
+       /* check if any callee save register needs to be saved */
+       for (reg = 0; reg < _jitc->reglen; ++reg)
+           if (jit_regset_tstbit(&_jitc->function->regset, reg) &&
+               (_rvs[reg].spec & jit_class_sav)) {
+               _jitc->function->need_stack = 1;
+               break;
+           }
+    }
+
+    if (_jitc->function->need_frame || _jitc->function->need_stack)
+       subi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
     /* callee save registers */
     /* callee save registers */
-#if __X32
-    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
-       stxi(12, _RSP_REGNO, _RDI_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
-       stxi( 8, _RSP_REGNO, _RSI_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       stxi( 4, _RSP_REGNO, _RBX_REGNO);
-#else
-#  if __CYGWIN__ || _WIN32
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
-       sse_stxi_d(136, _RSP_REGNO, _XMM15_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
-       sse_stxi_d(128, _RSP_REGNO, _XMM14_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
-       sse_stxi_d(120, _RSP_REGNO, _XMM13_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
-       sse_stxi_d(112, _RSP_REGNO, _XMM12_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
-       sse_stxi_d(104, _RSP_REGNO, _XMM11_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
-       sse_stxi_d(96, _RSP_REGNO, _XMM10_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
-       sse_stxi_d(88, _RSP_REGNO, _XMM9_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
-       sse_stxi_d(80, _RSP_REGNO, _XMM8_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
-       sse_stxi_d(72, _RSP_REGNO, _XMM7_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
-       sse_stxi_d(64, _RSP_REGNO, _XMM6_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
-       stxi(56, _RSP_REGNO, _R15_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
-       stxi(48, _RSP_REGNO, _R14_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
-       stxi(40, _RSP_REGNO, _R13_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
-       stxi(32, _RSP_REGNO, _R12_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
-       stxi(24, _RSP_REGNO, _RSI_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
-       stxi(16, _RSP_REGNO, _RDI_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       stxi( 8, _RSP_REGNO, _RBX_REGNO);
-#  else
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       stxi(40, _RSP_REGNO, _RBX_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
-       stxi(32, _RSP_REGNO, _R12_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
-       stxi(24, _RSP_REGNO, _R13_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
-       stxi(16, _RSP_REGNO, _R14_REGNO);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
-       stxi( 8, _RSP_REGNO, _R15_REGNO);
-#  endif
+    for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           stxi(offs, _RSP_REGNO, rn(iregs[reg]));
+           offs += REAL_WORDSIZE;
+       }
+    }
+#if __X64 && (__CYGWIN__ || _WIN32)
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           sse_stxi_d(offs, _RSP_REGNO, rn(fregs[reg]));
+           offs += sizeof(jit_float64_t);
+       }
+    }
 #endif
 #endif
-    stxi(0, _RSP_REGNO, _RBP_REGNO);
-    movr(_RBP_REGNO, _RSP_REGNO);
+
+    if (_jitc->function->need_frame) {
+       stxi(0, _RSP_REGNO, _RBP_REGNO);
+       movr(_RBP_REGNO, _RSP_REGNO);
+    }
 
     /* alloca */
 
     /* alloca */
-    subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
+    if (_jitc->function->stack)
+       subi(_RSP_REGNO, _RSP_REGNO, _jitc->function->stack);
     if (_jitc->function->allocar) {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), _jitc->function->self.aoff);
     if (_jitc->function->allocar) {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), _jitc->function->self.aoff);
@@ -3716,8 +4001,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
            /* test %al, %al */
            ic(0x84);
            ic(0xc0);
            /* test %al, %al */
            ic(0x84);
            ic(0xc0);
-           jes(0);
-           nofp_code = _jit->pc.w;
+           nofp_code = jes(0);
 
            /* Save fp registers in the save area, if any is a vararg */
            /* Note that the full 16 byte xmm is not saved, because
 
            /* Save fp registers in the save area, if any is a vararg */
            /* Note that the full 16 byte xmm is not saved, because
@@ -3728,7 +4012,7 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
                sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
                           reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
 
                sse_stxi_d(_jitc->function->vaoff + first_fp_offset +
                           reg * va_fp_increment, _RBP_REGNO, rn(_XMM0 - reg));
 
-           patch_rel_char(nofp_code, _jit->pc.w);
+           patch_at(nofp_code, _jit->pc.w);
        }
     }
 #endif
        }
     }
 #endif
@@ -3737,68 +4021,38 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
 static void
 _epilog(jit_state_t *_jit, jit_node_t *node)
 {
+    jit_int32_t                reg, offs;
     if (_jitc->function->assume_frame)
        return;
     if (_jitc->function->assume_frame)
        return;
+    if (_jitc->function->need_frame)
+       movr(_RSP_REGNO, _RBP_REGNO);
+
     /* callee save registers */
     /* callee save registers */
-    movr(_RSP_REGNO, _RBP_REGNO);
-#if __X32
-    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
-       ldxi(_RDI_REGNO, _RSP_REGNO, 12);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
-       ldxi(_RSI_REGNO, _RSP_REGNO,  8);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       ldxi(_RBX_REGNO, _RSP_REGNO,  4);
-#else
-#  if __CYGWIN__ || _WIN32
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM15))
-       sse_ldxi_d(_XMM15_REGNO, _RSP_REGNO, 136);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM14))
-       sse_ldxi_d(_XMM14_REGNO, _RSP_REGNO, 128);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM13))
-       sse_ldxi_d(_XMM13_REGNO, _RSP_REGNO, 120);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM12))
-       sse_ldxi_d(_XMM12_REGNO, _RSP_REGNO, 112);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM11))
-       sse_ldxi_d(_XMM11_REGNO, _RSP_REGNO, 104);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM10))
-       sse_ldxi_d(_XMM10_REGNO, _RSP_REGNO, 96);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM9))
-       sse_ldxi_d(_XMM9_REGNO, _RSP_REGNO, 88);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM8))
-       sse_ldxi_d(_XMM8_REGNO, _RSP_REGNO, 80);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM7))
-       sse_ldxi_d(_XMM7_REGNO, _RSP_REGNO, 72);
-    if (jit_regset_tstbit(&_jitc->function->regset, _XMM6))
-       sse_ldxi_d(_XMM6_REGNO, _RSP_REGNO, 64);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
-       ldxi(_R15_REGNO, _RSP_REGNO, 56);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
-       ldxi(_R14_REGNO, _RSP_REGNO, 48);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
-       ldxi(_R13_REGNO, _RSP_REGNO, 40);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
-       ldxi(_R12_REGNO, _RSP_REGNO, 32);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RSI))
-       ldxi(_RSI_REGNO, _RSP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RDI))
-       ldxi(_RDI_REGNO, _RSP_REGNO, 16);
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       ldxi(_RBX_REGNO, _RSP_REGNO,  8);
-#  else
-    if (jit_regset_tstbit(&_jitc->function->regset, _RBX))
-       ldxi(_RBX_REGNO, _RSP_REGNO, 40);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R12))
-       ldxi(_R12_REGNO, _RSP_REGNO, 32);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R13))
-       ldxi(_R13_REGNO, _RSP_REGNO, 24);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R14))
-       ldxi(_R14_REGNO, _RSP_REGNO, 16);
-    if (jit_regset_tstbit(&_jitc->function->regset, _R15))
-       ldxi(_R15_REGNO, _RSP_REGNO,  8);
-#  endif
+    for (reg = 0, offs = REAL_WORDSIZE; reg < jit_size(iregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg])) {
+           ldxi(rn(iregs[reg]), _RSP_REGNO, offs);
+           offs += REAL_WORDSIZE;
+       }
+    }
+#if __X64 && (__CYGWIN__ || _WIN32)
+    for (reg = 0; reg < jit_size(fregs); reg++) {
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg])) {
+           sse_ldxi_d(rn(fregs[reg]), _RSP_REGNO, offs);
+           offs += sizeof(jit_float64_t);
+       }
+    }
 #endif
 #endif
-    ldxi(_RBP_REGNO, _RSP_REGNO, 0);
-    addi(_RSP_REGNO, _RSP_REGNO, stack_framesize - REAL_WORDSIZE);
+
+    if (_jitc->function->need_frame) {
+       ldxi(_RBP_REGNO, _RSP_REGNO, 0);
+       addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
+    }
+    /* This condition does not happen as much as expected because
+     * it is not safe to not create a frame pointer if any function
+     * is called, even jit functions, as those might call external
+     * functions. */
+    else if (_jitc->function->need_stack)
+       addi(_RSP_REGNO, _RSP_REGNO, jit_framesize());
 
     ic(0xc3);
 }
 
     ic(0xc3);
 }
@@ -3808,7 +4062,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
 {
 #if __X32 || __CYGWIN__ || _WIN32
     assert(_jitc->function->self.call & jit_call_varargs);
 {
 #if __X32 || __CYGWIN__ || _WIN32
     assert(_jitc->function->self.call & jit_call_varargs);
-    addi(r0, _RBP_REGNO, _jitc->function->self.size);
+    addi(r0, _RBP_REGNO, jit_selfsize());
 #else
     jit_int32_t                reg;
 
 #else
     jit_int32_t                reg;
 
@@ -3827,7 +4081,7 @@ _vastart(jit_state_t *_jit, jit_int32_t r0)
     stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
 
     /* Initialize overflow pointer to the first stack argument. */
     stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
 
     /* Initialize overflow pointer to the first stack argument. */
-    addi(rn(reg), _RBP_REGNO, _jitc->function->self.size);
+    addi(rn(reg), _RBP_REGNO, jit_selfsize());
     stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
 
     /* Initialize register save area pointer. */
     stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
 
     /* Initialize register save area pointer. */
@@ -3861,8 +4115,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 
     /* Jump over if there are no remaining arguments in the save area. */
     icmpi(rn(rg0), va_gp_max_offset);
 
     /* Jump over if there are no remaining arguments in the save area. */
     icmpi(rn(rg0), va_gp_max_offset);
-    jaes(0);
-    ge_code = _jit->pc.w;
+    ge_code = jaes(0);
 
     /* Load the save area pointer in the second temporary. */
     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
 
     /* Load the save area pointer in the second temporary. */
     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
@@ -3878,11 +4131,10 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
-    jmpsi(0);
-    lt_code = _jit->pc.w;
+    lt_code = jmpsi(0);
 
     /* Where to land if argument is in overflow area. */
 
     /* Where to land if argument is in overflow area. */
-    patch_rel_char(ge_code, _jit->pc.w);
+    patch_at(ge_code, _jit->pc.w);
 
     /* Load overflow pointer. */
     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
 
     /* Load overflow pointer. */
     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
@@ -3895,7 +4147,7 @@ _vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
 
     /* Where to land if argument is in save area. */
     stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
 
     /* Where to land if argument is in save area. */
-    patch_rel_char(lt_code, _jit->pc.w);
+    patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
 #endif
 
     jit_unget_reg(rg0);
 #endif
@@ -3929,8 +4181,7 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
 
     /* Jump over if there are no remaining arguments in the save area. */
     icmpi(rn(rg0), va_fp_max_offset);
 
     /* Jump over if there are no remaining arguments in the save area. */
     icmpi(rn(rg0), va_fp_max_offset);
-    jaes(0);
-    ge_code = _jit->pc.w;
+    ge_code = jaes(0);
 
     /* Load the save area pointer in the second temporary. */
     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
 
     /* Load the save area pointer in the second temporary. */
     ldxi(rn(rg1), r1, offsetof(jit_va_list_t, save));
@@ -3949,11 +4200,10 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
     jit_unget_reg(rg1);
 
     /* Jump over overflow code. */
-    jmpsi(0);
-    lt_code = _jit->pc.w;
+    lt_code = jmpsi(0);
 
     /* Where to land if argument is in overflow area. */
 
     /* Where to land if argument is in overflow area. */
-    patch_rel_char(ge_code, _jit->pc.w);
+    patch_at(ge_code, _jit->pc.w);
 
     /* Load overflow pointer. */
     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
 
     /* Load overflow pointer. */
     ldxi(rn(rg0), r1, offsetof(jit_va_list_t, over));
@@ -3969,27 +4219,57 @@ _vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_bool_t x87)
     stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
 
     /* Where to land if argument is in save area. */
     stxi(offsetof(jit_va_list_t, over), r1, rn(rg0));
 
     /* Where to land if argument is in save area. */
-    patch_rel_char(lt_code, _jit->pc.w);
+    patch_at(lt_code, _jit->pc.w);
 
     jit_unget_reg(rg0);
 #endif
 }
 
 static void
 
     jit_unget_reg(rg0);
 #endif
 }
 
 static void
-_patch_at(jit_state_t *_jit, jit_node_t *node,
-         jit_word_t instr, jit_word_t label)
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
 {
 {
-    switch (node->code) {
-#  if __X64
-       case jit_code_calli:
-       case jit_code_jmpi:
-#  endif
-       case jit_code_movi:
-           patch_abs(instr, label);
+    jit_word_t          disp;
+    jit_uint8_t                *code = (jit_uint8_t *)instr;
+    ++instr;
+    switch (code[0]) {
+       /* movi_p */
+       case 0xb8 ... 0xbf:
+           *(jit_word_t *)instr = label;
            break;
            break;
-       default:
-           patch_rel(instr, label);
+           /* forward pc relative address known to be in range */
+#if CAN_RIP_ADDRESS
+       /* movi */
+       case 0x8d:
+           ++instr;
+           goto apply;
+#endif
+       /* jcc */
+       case 0x0f:
+           ++instr;
+           if (code[1] < 0x80 || code[1] > 0x8f)
+               goto fail;
+       /* calli */
+       case 0xe8:
+       /* jmpi */
+       case 0xe9:
+#if CAN_RIP_ADDRESS
+       apply:
+#endif
+           disp = label - (instr + 4);
+           assert((jit_int32_t)disp == disp);
+           *(jit_int32_t *)instr = disp;
+           break;
+           /* jccs */
+       case 0x70 ... 0x7f:
+           /* jmpsi */
+       case 0xeb:
+           disp = label - (instr + 1);
+           assert((jit_int8_t)disp == disp);
+           *(jit_int8_t *)instr = disp;
            break;
            break;
+       default:
+       fail:
+           abort();
     }
 }
 #endif
     }
 }
 #endif
index 4447a52..c3ac895 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
  */
 
 #if PROTO
  */
 
 #if PROTO
-#  if __X32
-#    define sse_address_p(i0)          1
-#  else
-#    if __X64_32
-#      define sse_address_p(i0)                ((jit_word_t)(i0) >= 0)
-#    else
-#      define sse_address_p(i0)                can_sign_extend_int_p(i0)
-#    endif
-#  endif
 #  define _XMM6_REGNO                  6
 #  define _XMM7_REGNO                  7
 #  define _XMM8_REGNO                  8
 #  define _XMM6_REGNO                  6
 #  define _XMM7_REGNO                  7
 #  define _XMM8_REGNO                  8
@@ -470,14 +461,14 @@ _sse_b##name##i_##type(jit_state_t *_jit,                         \
                       jit_word_t i0, jit_int32_t r0,                   \
                       jit_float##size##_t *i1)                         \
 {                                                                      \
                       jit_word_t i0, jit_int32_t r0,                   \
                       jit_float##size##_t *i1)                         \
 {                                                                      \
-    jit_word_t         word;                                           \
+    jit_word_t         w;                                              \
     jit_int32_t                reg = jit_get_reg(jit_class_fpr|jit_class_xpr|  \
                                          jit_class_nospill);           \
     assert(jit_sse_reg_p(reg));                                                \
     sse_movi_##type(rn(reg), i1);                                      \
     jit_int32_t                reg = jit_get_reg(jit_class_fpr|jit_class_xpr|  \
                                          jit_class_nospill);           \
     assert(jit_sse_reg_p(reg));                                                \
     sse_movi_##type(rn(reg), i1);                                      \
-    word = sse_b##name##r_##type(i0, r0, rn(reg));                     \
+    w = sse_b##name##r_##type(i0, r0, rn(reg));                                \
     jit_unget_reg(reg);                                                        \
     jit_unget_reg(reg);                                                        \
-    return (word);                                                     \
+    return (w);                                                                \
 }
 #  define fopi(name)                   fpr_opi(name, f, 32)
 #  define fbopi(name)                  fpr_bopi(name, f, 32)
 }
 #  define fopi(name)                   fpr_opi(name, f, 32)
 #  define fbopi(name)                  fpr_bopi(name, f, 32)
@@ -809,8 +800,17 @@ _sse_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
        ldi = !_jitc->no_data;
 #if __X64
        /* if will allocate a register for offset, just use immediate */
        ldi = !_jitc->no_data;
 #if __X64
        /* if will allocate a register for offset, just use immediate */
-       if (ldi && !sse_address_p(i0))
+#  if CAN_RIP_ADDRESS
+       if (ldi) {
+           jit_word_t  rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+           ldi = can_sign_extend_int_p(rel);
+           if (!ldi && address_p(i0))
+               ldi = 1;
+       }
+#  else
+       if (ldi && !address_p(i0))
            ldi = 0;
            ldi = 0;
+#  endif
 #endif
        if (ldi)
            sse_ldi_f(r0, (jit_word_t)i0);
 #endif
        if (ldi)
            sse_ldi_f(r0, (jit_word_t)i0);
@@ -840,10 +840,9 @@ _sse_eqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     }
     ixorr(reg, reg);
     ucomissr(r2, r1);
     }
     ixorr(reg, reg);
     ucomissr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_E, reg);
     cc(X86_CC_E, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
     if (!rc)
        xchgr(r0, reg);
 }
@@ -866,10 +865,9 @@ _sse_ner_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     }
     imovi(reg, 1);
     ucomissr(r2, r1);
     }
     imovi(reg, 1);
     ucomissr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_NE, reg);
     cc(X86_CC_NE, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
     if (!rc)
        xchgr(r0, reg);
 }
@@ -928,7 +926,13 @@ static void
 _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+    if (can_sign_extend_int_p(rel))
+       movssmr(rel, _NOREG, _NOREG, _SCL8, r0);
+    else
+#endif
+    if (address_p(i0))
        movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movssmr(i0, _NOREG, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -975,7 +979,13 @@ static void
 _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
 _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+    if (can_sign_extend_int_p(rel))
+       movssrm(r0, rel, _NOREG, _NOREG, _SCL8);
+    else
+#endif
+    if (address_p(i0))
        movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movssrm(r0, i0, _NOREG, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1022,8 +1032,7 @@ static jit_word_t
 _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r1, r0);
 _sse_bltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r1, r0);
-    ja(i0);
-    return (_jit->pc.w);
+    return (ja(i0));
 }
 fbopi(lt)
 
 }
 fbopi(lt)
 
@@ -1031,21 +1040,20 @@ static jit_word_t
 _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r1, r0);
 _sse_bler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r1, r0);
-    jae(i0);
-    return (_jit->pc.w);
+    return (jae(i0));
 }
 fbopi(le)
 
 static jit_word_t
 _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 fbopi(le)
 
 static jit_word_t
 _sse_beqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     jit_word_t         jp_code;
     ucomissr(r0, r1);
     jit_word_t         jp_code;
     ucomissr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    je(i0);
-    patch_rel_char(jp_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jps(0);
+    w = je(i0);
+    patch_at(jp_code, _jit->pc.w);
+    return (w);
 }
 fbopi(eq)
 
 }
 fbopi(eq)
 
@@ -1053,8 +1061,7 @@ static jit_word_t
 _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
 _sse_bger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jae(i0);
-    return (_jit->pc.w);
+    return (jae(i0));
 }
 fbopi(ge)
 
 }
 fbopi(ge)
 
@@ -1062,25 +1069,23 @@ static jit_word_t
 _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
 _sse_bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    ja(i0);
-    return (_jit->pc.w);
+    return (ja(i0));
 }
 fbopi(gt)
 
 static jit_word_t
 _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 fbopi(gt)
 
 static jit_word_t
 _sse_bner_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     jit_word_t         jp_code;
     jit_word_t         jz_code;
     ucomissr(r0, r1);
     jit_word_t         jp_code;
     jit_word_t         jz_code;
     ucomissr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    jzs(0);
-    jz_code = _jit->pc.w;
-    patch_rel_char(jp_code, _jit->pc.w);
-    jmpi(i0);
-    patch_rel_char(jz_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jps(0);
+    jz_code = jzs(0);
+    patch_at(jp_code, _jit->pc.w);
+    w = jmpi(i0);
+    patch_at(jz_code, _jit->pc.w);
+    return (w);
 }
 fbopi(ne)
 
 }
 fbopi(ne)
 
@@ -1088,47 +1093,49 @@ static jit_word_t
 _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
 _sse_bunltr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jnae(i0);
-    return (_jit->pc.w);
+    return (jnae(i0));
 }
 fbopi(unlt)
 
 static jit_word_t
 _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 fbopi(unlt)
 
 static jit_word_t
 _sse_bunler_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomissr(r0, r1);
     else {
        ucomissr(r0, r1);
-       jna(i0);
+       w = jna(i0);
     }
     }
-    return (_jit->pc.w);
+    return (w);
 }
 fbopi(unle)
 
 static jit_word_t
 _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 fbopi(unle)
 
 static jit_word_t
 _sse_buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomissr(r0, r1);
     else {
        ucomissr(r0, r1);
-       je(i0);
+       w = je(i0);
     }
     }
-    return (_jit->pc.w);
+    return (w);
 }
 fbopi(uneq)
 
 static jit_word_t
 _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 fbopi(uneq)
 
 static jit_word_t
 _sse_bunger_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomissr(r1, r0);
     else {
        ucomissr(r1, r0);
-       jna(i0);
+       w = jna(i0);
     }
     }
-    return (_jit->pc.w);
+    return (w);
 }
 fbopi(unge)
 
 }
 fbopi(unge)
 
@@ -1136,8 +1143,7 @@ static jit_word_t
 _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r1, r0);
 _sse_bungtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r1, r0);
-    jnae(i0);
-    return (_jit->pc.w);
+    return (jnae(i0));
 }
 fbopi(ungt)
 
 }
 fbopi(ungt)
 
@@ -1145,8 +1151,7 @@ static jit_word_t
 _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
 _sse_bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jne(i0);
-    return (_jit->pc.w);
+    return (jne(i0));
 }
 fbopi(ltgt)
 
 }
 fbopi(ltgt)
 
@@ -1154,8 +1159,7 @@ static jit_word_t
 _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
 _sse_bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jnp(i0);
-    return (_jit->pc.w);
+    return (jnp(i0));
 }
 fbopi(ord)
 
 }
 fbopi(ord)
 
@@ -1163,8 +1167,7 @@ static jit_word_t
 _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
 _sse_bunordr_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomissr(r0, r1);
-    jp(i0);
-    return (_jit->pc.w);
+    return (jp(i0));
 }
 fbopi(unord)
 
 }
 fbopi(unord)
 
@@ -1185,10 +1188,9 @@ _sse_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     }
     ixorr(reg, reg);
     ucomisdr(r2, r1);
     }
     ixorr(reg, reg);
     ucomisdr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_E, reg);
     cc(X86_CC_E, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
     if (!rc)
        xchgr(r0, reg);
 }
@@ -1211,10 +1213,9 @@ _sse_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
     }
     imovi(reg, 1);
     ucomisdr(r2, r1);
     }
     imovi(reg, 1);
     ucomisdr(r2, r1);
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_NE, reg);
     cc(X86_CC_NE, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
     if (!rc)
        xchgr(r0, reg);
 }
@@ -1294,8 +1295,17 @@ _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
        ldi = !_jitc->no_data;
 #if __X64
        /* if will allocate a register for offset, just use immediate */
        ldi = !_jitc->no_data;
 #if __X64
        /* if will allocate a register for offset, just use immediate */
-       if (ldi && !sse_address_p(i0))
+#  if CAN_RIP_ADDRESS
+       if (ldi) {
+           jit_word_t  rel = (jit_word_t)i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+           ldi = can_sign_extend_int_p(rel);
+           if (!ldi && address_p(i0))
+               ldi = 1;
+       }
+#  else
+       if (ldi && !address_p(i0))
            ldi = 0;
            ldi = 0;
+#  endif
 #endif
        if (ldi)
            sse_ldi_d(r0, (jit_word_t)i0);
 #endif
        if (ldi)
            sse_ldi_d(r0, (jit_word_t)i0);
@@ -1306,6 +1316,7 @@ _sse_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
            movdqxr(r0, rn(reg));
            jit_unget_reg(reg);
 #else
            movdqxr(r0, rn(reg));
            jit_unget_reg(reg);
 #else
+           CHECK_CVT_OFFSET();
            movi(rn(reg), data.ii[0]);
            stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
            movi(rn(reg), data.ii[1]);
            movi(rn(reg), data.ii[0]);
            stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
            movi(rn(reg), data.ii[1]);
@@ -1321,7 +1332,13 @@ static void
 _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
 _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+    if (can_sign_extend_int_p(rel))
+       movsdmr(rel, _NOREG, _NOREG, _SCL8, r0);
+    else
+#endif
+    if (address_p(i0))
        movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movsdmr(i0, _NOREG, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1368,7 +1385,13 @@ static void
 _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
 _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
-    if (sse_address_p(i0))
+#if CAN_RIP_ADDRESS
+    jit_word_t         rel = i0 - (_jit->pc.w + 8 + !!(r0 & 8));
+    if (can_sign_extend_int_p(rel))
+       movsdrm(r0, rel, _NOREG, _NOREG, _SCL8);
+    else
+#endif
+    if (address_p(i0))
        movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
        movsdrm(r0, i0, _NOREG, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -1415,8 +1438,7 @@ static jit_word_t
 _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r1, r0);
 _sse_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r1, r0);
-    ja(i0);
-    return (_jit->pc.w);
+    return (ja(i0));
 }
 dbopi(lt)
 
 }
 dbopi(lt)
 
@@ -1424,21 +1446,20 @@ static jit_word_t
 _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r1, r0);
 _sse_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r1, r0);
-    jae(i0);
-    return (_jit->pc.w);
+    return (jae(i0));
 }
 dbopi(le)
 
 static jit_word_t
 _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 dbopi(le)
 
 static jit_word_t
 _sse_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     jit_word_t         jp_code;
     ucomisdr(r0, r1);
     jit_word_t         jp_code;
     ucomisdr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    je(i0);
-    patch_rel_char(jp_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jps(0);
+    w = je(i0);
+    patch_at(jp_code, _jit->pc.w);
+    return (w);
 }
 dbopi(eq)
 
 }
 dbopi(eq)
 
@@ -1446,8 +1467,7 @@ static jit_word_t
 _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
 _sse_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jae(i0);
-    return (_jit->pc.w);
+    return (jae(i0));
 }
 dbopi(ge)
 
 }
 dbopi(ge)
 
@@ -1455,25 +1475,23 @@ static jit_word_t
 _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
 _sse_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    ja(i0);
-    return (_jit->pc.w);
+    return (ja(i0));
 }
 dbopi(gt)
 
 static jit_word_t
 _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 dbopi(gt)
 
 static jit_word_t
 _sse_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     jit_word_t         jp_code;
     jit_word_t         jz_code;
     ucomisdr(r0, r1);
     jit_word_t         jp_code;
     jit_word_t         jz_code;
     ucomisdr(r0, r1);
-    jps(0);
-    jp_code = _jit->pc.w;
-    jzs(0);
-    jz_code = _jit->pc.w;
-    patch_rel_char(jp_code, _jit->pc.w);
-    jmpi(i0);
-    patch_rel_char(jz_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jps(0);
+    jz_code = jzs(0);
+    patch_at(jp_code, _jit->pc.w);
+    w = jmpi(i0);
+    patch_at(jz_code, _jit->pc.w);
+    return (w);
 }
 dbopi(ne)
 
 }
 dbopi(ne)
 
@@ -1481,47 +1499,49 @@ static jit_word_t
 _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
 _sse_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jnae(i0);
-    return (_jit->pc.w);
+    return (jnae(i0));
 }
 dbopi(unlt)
 
 static jit_word_t
 _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 dbopi(unlt)
 
 static jit_word_t
 _sse_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomisdr(r0, r1);
     else {
        ucomisdr(r0, r1);
-       jna(i0);
+       w = jna(i0);
     }
     }
-    return (_jit->pc.w);
+    return (w);
 }
 dbopi(unle)
 
 static jit_word_t
 _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 dbopi(unle)
 
 static jit_word_t
 _sse_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomisdr(r0, r1);
     else {
        ucomisdr(r0, r1);
-       je(i0);
+       w = je(i0);
     }
     }
-    return (_jit->pc.w);
+    return (w);
 }
 dbopi(uneq)
 
 static jit_word_t
 _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 }
 dbopi(uneq)
 
 static jit_word_t
 _sse_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t         w;
     if (r0 == r1)
     if (r0 == r1)
-       jmpi(i0);
+       w = jmpi(i0);
     else {
        ucomisdr(r1, r0);
     else {
        ucomisdr(r1, r0);
-       jna(i0);
+       w = jna(i0);
     }
     }
-    return (_jit->pc.w);
+    return (w);
 }
 dbopi(unge)
 
 }
 dbopi(unge)
 
@@ -1529,8 +1549,7 @@ static jit_word_t
 _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r1, r0);
 _sse_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r1, r0);
-    jnae(i0);
-    return (_jit->pc.w);
+    return (jnae(i0));
 }
 dbopi(ungt)
 
 }
 dbopi(ungt)
 
@@ -1538,8 +1557,7 @@ static jit_word_t
 _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
 _sse_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jne(i0);
-    return (_jit->pc.w);
+    return (jne(i0));
 }
 dbopi(ltgt)
 
 }
 dbopi(ltgt)
 
@@ -1547,8 +1565,7 @@ static jit_word_t
 _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
 _sse_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jnp(i0);
-    return (_jit->pc.w);
+    return (jnp(i0));
 }
 dbopi(ord)
 
 }
 dbopi(ord)
 
@@ -1556,8 +1573,7 @@ static jit_word_t
 _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
 _sse_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     ucomisdr(r0, r1);
-    jp(i0);
-    return (_jit->pc.w);
+    return (jp(i0));
 }
 dbopi(unord)
 #  undef fopi
 }
 dbopi(unord)
 #  undef fopi
index eb668b3..5c4515a 100644 (file)
@@ -3,9 +3,10 @@
 #define JIT_INSTR_MAX 42
     0, /* data */
     0, /* live */
 #define JIT_INSTR_MAX 42
     0, /* data */
     0, /* live */
-    3, /* align */
+    11,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     3, /* label */
     0, /* #name */
     0, /* #note */
     3, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     3, /* va_start */
     5, /* va_arg */
     7, /* va_arg_d */
     3, /* va_start */
     5, /* va_arg */
     7, /* va_arg_d */
@@ -36,9 +52,9 @@
     5, /* addxi */
     4, /* subr */
     6, /* subi */
     5, /* addxi */
     4, /* subr */
     6, /* subi */
-    6, /* subcr */
+    12,        /* subcr */
     6, /* subci */
     6, /* subci */
-    6, /* subxr */
+    12,        /* subxr */
     5, /* subxi */
     8, /* rsbi */
     5, /* mulr */
     5, /* subxi */
     8, /* rsbi */
     5, /* mulr */
@@ -52,9 +68,9 @@
     22,        /* divr_u */
     25,        /* divi_u */
     23,        /* qdivr */
     22,        /* divr_u */
     25,        /* divi_u */
     23,        /* qdivr */
-    26,        /* qdivi */
+    28,        /* qdivi */
     24,        /* qdivr_u */
     24,        /* qdivr_u */
-    27,        /* qdivi_u */
+    29,        /* qdivi_u */
     21,        /* remr */
     24,        /* remi */
     22,        /* remr_u */
     21,        /* remr */
     24,        /* remi */
     22,        /* remr_u */
     5, /* movi */
     5, /* movnr */
     5, /* movzr */
     5, /* movi */
     5, /* movnr */
     5, /* movzr */
+    9, /* casr */
+    13,        /* casi */
     11,        /* extr_c */
     11,        /* extr_uc */
     3, /* extr_s */
     3, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     11,        /* extr_c */
     11,        /* extr_uc */
     3, /* extr_s */
     3, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    7, /* bswapr_us */
+    4, /* bswapr_ui */
+    0, /* bswapr_ul */
     7, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     7, /* htonr_us */
     4, /* htonr_ui */
     0, /* htonr_ul */
     2, /* callr */
     5, /* calli */
     0, /* prepare */
     2, /* callr */
     5, /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     4, /* extr_d */
     4, /* extr_f_d */
     10,        /* movr_d */
     4, /* extr_d */
     4, /* extr_f_d */
     10,        /* movr_d */
-    24,        /* movi_d */
+    33,        /* movi_d */
     4, /* ldr_d */
     8, /* ldi_d */
     5, /* ldxr_d */
     4, /* ldr_d */
     8, /* ldi_d */
     5, /* ldxr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    7, /* bswapr_us */
-    4, /* bswapr_ui */
-    0, /* bswapr_ul */
-    9, /* casr */
-    13,        /* casi */
-#endif
+    21,        /* clo */
+    17,        /* clz */
+    15,        /* cto */
+    11,        /* ctz */
+#endif /* __X32 */
 
 #if __X64
 #if __CYGWIN__ || _WIN32
 #define JIT_INSTR_MAX 130
     0, /* data */
     0, /* live */
 
 #if __X64
 #if __CYGWIN__ || _WIN32
 #define JIT_INSTR_MAX 130
     0, /* data */
     0, /* live */
-    6, /* align */
+    27,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     7, /* label */
     0, /* #name */
     0, /* #note */
     7, /* label */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     7, /* va_start */
     7, /* va_arg */
     9, /* va_arg_d */
     7, /* va_start */
     7, /* va_arg */
     9, /* va_arg_d */
     10,        /* movi */
     7, /* movnr */
     7, /* movzr */
     10,        /* movi */
     7, /* movnr */
     7, /* movzr */
+    11,        /* casr */
+    21,        /* casi */
     7, /* extr_c */
     7, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     3, /* extr_i */
     3, /* extr_ui */
     7, /* extr_c */
     7, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     3, /* extr_i */
     3, /* extr_ui */
+    9, /* bswapr_us */
+    6, /* bswapr_ui */
+    6, /* bswapr_ul */
     9, /* htonr_us */
     6, /* htonr_ui */
     6, /* htonr_ul */
     4, /* ldr_c */
     9, /* htonr_us */
     6, /* htonr_ui */
     6, /* htonr_ul */
     4, /* ldr_c */
-    15,        /* ldi_c */
+    14,        /* ldi_c */
     4, /* ldr_uc */
     4, /* ldr_uc */
-    15,        /* ldi_uc */
+    14,        /* ldi_uc */
     4, /* ldr_s */
     4, /* ldr_s */
-    15,        /* ldi_s */
+    14,        /* ldi_s */
     4, /* ldr_us */
     4, /* ldr_us */
-    15,        /* ldi_us */
+    14,        /* ldi_us */
     3, /* ldr_i */
     3, /* ldr_i */
-    14,        /* ldi_i */
+    13,        /* ldi_i */
     3, /* ldr_ui */
     3, /* ldr_ui */
-    14,        /* ldi_ui */
+    13,        /* ldi_ui */
     3, /* ldr_l */
     3, /* ldr_l */
-    14,        /* ldi_l */
+    13,        /* ldi_l */
     5, /* ldxr_c */
     8, /* ldxi_c */
     5, /* ldxr_uc */
     5, /* ldxr_c */
     8, /* ldxi_c */
     5, /* ldxr_uc */
     4, /* ldxr_l */
     7, /* ldxi_l */
     6, /* str_c */
     4, /* ldxr_l */
     7, /* ldxi_l */
     6, /* str_c */
-    17,        /* sti_c */
+    16,        /* sti_c */
     4, /* str_s */
     4, /* str_s */
-    15,        /* sti_s */
+    14,        /* sti_s */
     3, /* str_i */
     3, /* str_i */
-    14,        /* sti_i */
+    13,        /* sti_i */
     3, /* str_l */
     3, /* str_l */
-    14,        /* sti_l */
+    13,        /* sti_l */
     7, /* stxr_c */
     7, /* stxi_c */
     5, /* stxr_s */
     7, /* stxr_c */
     7, /* stxi_c */
     5, /* stxr_s */
     10,        /* bxsubi */
     9, /* bxsubr_u */
     10,        /* bxsubi_u */
     10,        /* bxsubi */
     9, /* bxsubr_u */
     10,        /* bxsubi_u */
-    3, /* jmpr */
+    2, /* jmpr */
     5, /* jmpi */
     5, /* jmpi */
-    3, /* callr */
-    13,        /* calli */
+    2, /* callr */
+    20,        /* calli */
     0, /* prepare */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* putargr_f */
     0, /* putargi_f */
     10,        /* addr_f */
     0, /* putargr_f */
     0, /* putargi_f */
     10,        /* addr_f */
-    21,        /* addi_f */
+    19,        /* addi_f */
     15,        /* subr_f */
     15,        /* subr_f */
-    21,        /* subi_f */
-    27,        /* rsbi_f */
+    19,        /* subi_f */
+    26,        /* rsbi_f */
     10,        /* mulr_f */
     10,        /* mulr_f */
-    21,        /* muli_f */
+    19,        /* muli_f */
     15,        /* divr_f */
     15,        /* divr_f */
-    21,        /* divi_f */
-    15,        /* negr_f */
+    19,        /* divi_f */
+    14,        /* negr_f */
     15,        /* absr_f */
     5, /* sqrtr_f */
     16,        /* ltr_f */
     15,        /* absr_f */
     5, /* sqrtr_f */
     16,        /* ltr_f */
-    31,        /* lti_f */
+    30,        /* lti_f */
     16,        /* ler_f */
     16,        /* ler_f */
-    31,        /* lei_f */
+    30,        /* lei_f */
     18,        /* eqr_f */
     18,        /* eqr_f */
-    33,        /* eqi_f */
+    32,        /* eqi_f */
     16,        /* ger_f */
     16,        /* ger_f */
-    31,        /* gei_f */
+    30,        /* gei_f */
     16,        /* gtr_f */
     16,        /* gtr_f */
-    31,        /* gti_f */
+    30,        /* gti_f */
     20,        /* ner_f */
     20,        /* ner_f */
-    35,        /* nei_f */
+    34,        /* nei_f */
     16,        /* unltr_f */
     16,        /* unltr_f */
-    31,        /* unlti_f */
+    30,        /* unlti_f */
     16,        /* unler_f */
     16,        /* unler_f */
-    31,        /* unlei_f */
+    30,        /* unlei_f */
     16,        /* uneqr_f */
     16,        /* uneqr_f */
-    31,        /* uneqi_f */
+    30,        /* uneqi_f */
     16,        /* unger_f */
     16,        /* unger_f */
-    31,        /* ungei_f */
+    30,        /* ungei_f */
     16,        /* ungtr_f */
     16,        /* ungtr_f */
-    31,        /* ungti_f */
+    30,        /* ungti_f */
     16,        /* ltgtr_f */
     16,        /* ltgtr_f */
-    31,        /* ltgti_f */
+    30,        /* ltgti_f */
     16,        /* ordr_f */
     16,        /* ordr_f */
-    31,        /* ordi_f */
+    30,        /* ordi_f */
     16,        /* unordr_f */
     16,        /* unordr_f */
-    31,        /* unordi_f */
+    30,        /* unordi_f */
     5, /* truncr_f_i */
     5, /* truncr_f_l */
     5, /* extr_f */
     5, /* extr_d_f */
     5, /* movr_f */
     5, /* truncr_f_i */
     5, /* truncr_f_l */
     5, /* extr_f */
     5, /* extr_d_f */
     5, /* movr_f */
-    15,        /* movi_f */
+    18,        /* movi_f */
     5, /* ldr_f */
     5, /* ldr_f */
-    16,        /* ldi_f */
+    15,        /* ldi_f */
     6, /* ldxr_f */
     8, /* ldxi_f */
     5, /* str_f */
     6, /* ldxr_f */
     8, /* ldxi_f */
     5, /* str_f */
-    16,        /* sti_f */
+    15,        /* sti_f */
     6, /* stxr_f */
     9, /* stxi_f */
     10,        /* bltr_f */
     6, /* stxr_f */
     9, /* stxi_f */
     10,        /* bltr_f */
-    21,        /* blti_f */
+    19,        /* blti_f */
     10,        /* bler_f */
     10,        /* bler_f */
-    24,        /* blei_f */
+    23,        /* blei_f */
     12,        /* beqr_f */
     27,        /* beqi_f */
     10,        /* bger_f */
     12,        /* beqr_f */
     27,        /* beqi_f */
     10,        /* bger_f */
-    25,        /* bgei_f */
+    24,        /* bgei_f */
     10,        /* bgtr_f */
     10,        /* bgtr_f */
-    25,        /* bgti_f */
+    24,        /* bgti_f */
     13,        /* bner_f */
     13,        /* bner_f */
-    28,        /* bnei_f */
+    27,        /* bnei_f */
     10,        /* bunltr_f */
     10,        /* bunltr_f */
-    25,        /* bunlti_f */
+    24,        /* bunlti_f */
     10,        /* bunler_f */
     10,        /* bunler_f */
-    25,        /* bunlei_f */
+    24,        /* bunlei_f */
     10,        /* buneqr_f */
     10,        /* buneqr_f */
-    25,        /* buneqi_f */
+    24,        /* buneqi_f */
     10,        /* bunger_f */
     10,        /* bunger_f */
-    25,        /* bungei_f */
+    24,        /* bungei_f */
     10,        /* bungtr_f */
     10,        /* bungtr_f */
-    25,        /* bungti_f */
+    24,        /* bungti_f */
     10,        /* bltgtr_f */
     10,        /* bltgtr_f */
-    25,        /* bltgti_f */
+    24,        /* bltgti_f */
     10,        /* bordr_f */
     10,        /* bordr_f */
-    25,        /* bordi_f */
+    24,        /* bordi_f */
     10,        /* bunordr_f */
     10,        /* bunordr_f */
-    25,        /* bunordi_f */
+    24,        /* bunordi_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     25,        /* muli_d */
     15,        /* divr_d */
     25,        /* divi_d */
     25,        /* muli_d */
     15,        /* divr_d */
     25,        /* divi_d */
-    22,        /* negr_d */
+    21,        /* negr_d */
     16,        /* absr_d */
     5, /* sqrtr_d */
     17,        /* ltr_d */
     16,        /* absr_d */
     5, /* sqrtr_d */
     17,        /* ltr_d */
     5, /* extr_d */
     5, /* extr_f_d */
     5, /* movr_d */
     5, /* extr_d */
     5, /* extr_f_d */
     5, /* movr_d */
-    15,        /* movi_d */
+    29,        /* movi_d */
     5, /* ldr_d */
     5, /* ldr_d */
-    16,        /* ldi_d */
+    15,        /* ldi_d */
     6, /* ldxr_d */
     8, /* ldxi_d */
     5, /* str_d */
     6, /* ldxr_d */
     8, /* ldxi_d */
     5, /* str_d */
-    16,        /* sti_d */
+    15,        /* sti_d */
     6, /* stxr_d */
     9, /* stxi_d */
     11,        /* bltr_d */
     6, /* stxr_d */
     9, /* stxi_d */
     11,        /* bltr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    9, /* bswapr_us */
-    6, /* bswapr_ui */
-    6, /* bswapr_ul */
-    0, /* casr */
-    0, /* casi */
+    27,        /* clo */
+    21,        /* clz */
+    20,        /* cto */
+    14,        /* ctz */
 #else
 
 #  if __X64_32
 #else
 
 #  if __X64_32
-#define JIT_INSTR_MAX 108
+#define JIT_INSTR_MAX 105
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    3, /* align */
+    7, /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     3, /* label */
     0, /* #name */
     0, /* #note */
     3, /* label */
-    108,       /* prolog */
+    105,       /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
-    41,        /* va_start */
-    45,        /* va_arg */
-    54,        /* va_arg_d */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    33,        /* va_start */
+    43,        /* va_arg */
+    45,        /* va_arg_d */
     0, /* va_end */
     5, /* addr */
     7, /* addi */
     0, /* va_end */
     5, /* addr */
     7, /* addi */
     6, /* movi */
     7, /* movnr */
     7, /* movzr */
     6, /* movi */
     7, /* movnr */
     7, /* movzr */
+    11,        /* casr */
+    16,        /* casi */
     7, /* extr_c */
     7, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
     7, /* extr_c */
     7, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     0, /* extr_i */
     0, /* extr_ui */
+    9, /* bswapr_us */
+    6, /* bswapr_ui */
+    0, /* bswapr_ul */
     9, /* htonr_us */
     6, /* htonr_ui */
     0, /* htonr_ul */
     9, /* htonr_us */
     6, /* htonr_ui */
     0, /* htonr_ul */
     8, /* sti_i */
     0, /* str_l */
     0, /* sti_l */
     8, /* sti_i */
     0, /* str_l */
     0, /* sti_l */
-    12,        /* stxr_c */
+    11,        /* stxr_c */
     7, /* stxi_c */
     7, /* stxi_c */
-    10,        /* stxr_s */
+    9, /* stxr_s */
     7, /* stxi_s */
     7, /* stxi_s */
-    9, /* stxr_i */
+    8, /* stxr_i */
     6, /* stxi_i */
     0, /* stxr_l */
     0, /* stxi_l */
     6, /* stxi_i */
     0, /* stxr_l */
     0, /* stxi_l */
     10,        /* bxsubi_u */
     2, /* jmpr */
     5, /* jmpi */
     10,        /* bxsubi_u */
     2, /* jmpr */
     5, /* jmpi */
-    3, /* callr */
+    2, /* callr */
     9, /* calli */
     0, /* prepare */
     9, /* calli */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* putargr_f */
     0, /* putargi_f */
     10,        /* addr_f */
     0, /* putargr_f */
     0, /* putargi_f */
     10,        /* addr_f */
-    21,        /* addi_f */
+    20,        /* addi_f */
     15,        /* subr_f */
     15,        /* subr_f */
-    21,        /* subi_f */
-    26,        /* rsbi_f */
+    20,        /* subi_f */
+    25,        /* rsbi_f */
     10,        /* mulr_f */
     10,        /* mulr_f */
-    21,        /* muli_f */
+    20,        /* muli_f */
     15,        /* divr_f */
     15,        /* divr_f */
-    21,        /* divi_f */
+    20,        /* divi_f */
     15,        /* negr_f */
     15,        /* absr_f */
     5, /* sqrtr_f */
     15,        /* negr_f */
     15,        /* absr_f */
     5, /* sqrtr_f */
     11,        /* movi_f */
     6, /* ldr_f */
     10,        /* ldi_f */
     11,        /* movi_f */
     6, /* ldr_f */
     10,        /* ldi_f */
-    11,        /* ldxr_f */
+    10,        /* ldxr_f */
     9, /* ldxi_f */
     6, /* str_f */
     10,        /* sti_f */
     9, /* ldxi_f */
     6, /* str_f */
     10,        /* sti_f */
-    11,        /* stxr_f */
+    10,        /* stxr_f */
     9, /* stxi_f */
     10,        /* bltr_f */
     9, /* stxi_f */
     10,        /* bltr_f */
-    21,        /* blti_f */
+    20,        /* blti_f */
     10,        /* bler_f */
     10,        /* bler_f */
-    21,        /* blei_f */
+    20,        /* blei_f */
     12,        /* beqr_f */
     23,        /* beqi_f */
     10,        /* bger_f */
     12,        /* beqr_f */
     23,        /* beqi_f */
     10,        /* bger_f */
-    21,        /* bgei_f */
+    20,        /* bgei_f */
     10,        /* bgtr_f */
     10,        /* bgtr_f */
-    21,        /* bgti_f */
+    20,        /* bgti_f */
     13,        /* bner_f */
     13,        /* bner_f */
-    24,        /* bnei_f */
+    23,        /* bnei_f */
     10,        /* bunltr_f */
     10,        /* bunltr_f */
-    21,        /* bunlti_f */
+    20,        /* bunlti_f */
     10,        /* bunler_f */
     10,        /* bunler_f */
-    21,        /* bunlei_f */
+    20,        /* bunlei_f */
     10,        /* buneqr_f */
     10,        /* buneqr_f */
-    21,        /* buneqi_f */
+    20,        /* buneqi_f */
     10,        /* bunger_f */
     10,        /* bunger_f */
-    21,        /* bungei_f */
+    20,        /* bungei_f */
     10,        /* bungtr_f */
     10,        /* bungtr_f */
-    21,        /* bungti_f */
+    20,        /* bungti_f */
     10,        /* bltgtr_f */
     10,        /* bltgtr_f */
-    21,        /* bltgti_f */
+    20,        /* bltgti_f */
     10,        /* bordr_f */
     10,        /* bordr_f */
-    21,        /* bordi_f */
+    20,        /* bordi_f */
     10,        /* bunordr_f */
     10,        /* bunordr_f */
-    21,        /* bunordi_f */
+    20,        /* bunordi_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* putargr_d */
     0, /* putargi_d */
     10,        /* addr_d */
     0, /* putargr_d */
     0, /* putargi_d */
     10,        /* addr_d */
-    33,        /* addi_d */
+    29,        /* addi_d */
     15,        /* subr_d */
     15,        /* subr_d */
-    33,        /* subi_d */
-    38,        /* rsbi_d */
+    29,        /* subi_d */
+    34,        /* rsbi_d */
     10,        /* mulr_d */
     10,        /* mulr_d */
-    33,        /* muli_d */
+    29,        /* muli_d */
     15,        /* divr_d */
     15,        /* divr_d */
-    33,        /* divi_d */
+    29,        /* divi_d */
     22,        /* negr_d */
     16,        /* absr_d */
     5, /* sqrtr_d */
     22,        /* negr_d */
     16,        /* absr_d */
     5, /* sqrtr_d */
     23,        /* movi_d */
     6, /* ldr_d */
     10,        /* ldi_d */
     23,        /* movi_d */
     6, /* ldr_d */
     10,        /* ldi_d */
-    11,        /* ldxr_d */
+    10,        /* ldxr_d */
     9, /* ldxi_d */
     6, /* str_d */
     10,        /* sti_d */
     9, /* ldxi_d */
     6, /* str_d */
     10,        /* sti_d */
-    11,        /* stxr_d */
+    10,        /* stxr_d */
     9, /* stxi_d */
     11,        /* bltr_d */
     9, /* stxi_d */
     11,        /* bltr_d */
-    34,        /* blti_d */
+    30,        /* blti_d */
     11,        /* bler_d */
     11,        /* bler_d */
-    34,        /* blei_d */
+    30,        /* blei_d */
     13,        /* beqr_d */
     36,        /* beqi_d */
     11,        /* bger_d */
     13,        /* beqr_d */
     36,        /* beqi_d */
     11,        /* bger_d */
-    34,        /* bgei_d */
+    30,        /* bgei_d */
     11,        /* bgtr_d */
     11,        /* bgtr_d */
-    34,        /* bgti_d */
+    30,        /* bgti_d */
     14,        /* bner_d */
     14,        /* bner_d */
-    37,        /* bnei_d */
+    33,        /* bnei_d */
     11,        /* bunltr_d */
     11,        /* bunltr_d */
-    34,        /* bunlti_d */
+    30,        /* bunlti_d */
     11,        /* bunler_d */
     11,        /* bunler_d */
-    34,        /* bunlei_d */
+    30,        /* bunlei_d */
     11,        /* buneqr_d */
     11,        /* buneqr_d */
-    34,        /* buneqi_d */
+    30,        /* buneqi_d */
     11,        /* bunger_d */
     11,        /* bunger_d */
-    34,        /* bungei_d */
+    30,        /* bungei_d */
     11,        /* bungtr_d */
     11,        /* bungtr_d */
-    34,        /* bungti_d */
+    30,        /* bungti_d */
     11,        /* bltgtr_d */
     11,        /* bltgtr_d */
-    34,        /* bltgti_d */
+    30,        /* bltgti_d */
     11,        /* bordr_d */
     11,        /* bordr_d */
-    34,        /* bordi_d */
+    30,        /* bordi_d */
     11,        /* bunordr_d */
     11,        /* bunordr_d */
-    34,        /* bunordi_d */
+    30,        /* bunordi_d */
     0, /* pushargr_d */
     0, /* pushargi_d */
     0, /* retr_d */
     0, /* pushargr_d */
     0, /* pushargi_d */
     0, /* retr_d */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    9, /* bswapr_us */
-    6, /* bswapr_ui */
-    0, /* bswapr_ul */
-    0, /* casr */
-    0, /* casi */
+    11,        /* clo */
+    5, /* clz */
+    11,        /* cto */
+    5, /* ctz */
+#else
 
 
-#  else
-#define JIT_INSTR_MAX 115
+#define JIT_INSTR_MAX 112
     0, /* data */
     0, /* live */
     0, /* data */
     0, /* live */
-    6, /* align */
+    27,        /* align */
     0, /* save */
     0, /* load */
     0, /* save */
     0, /* load */
+    4, /* skip */
     0, /* #name */
     0, /* #note */
     7, /* label */
     0, /* #name */
     0, /* #note */
     7, /* label */
-    115,       /* prolog */
+    112,       /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* allocar */
-    0, /* arg */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_c */
     0, /* getarg_uc */
     0, /* getarg_s */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
     0, /* getarg_i */
     0, /* getarg_ui */
     0, /* getarg_l */
-    0, /* putargr */
-    0, /* putargi */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
     38,        /* va_start */
     41,        /* va_arg */
     48,        /* va_arg_d */
     38,        /* va_start */
     41,        /* va_arg */
     48,        /* va_arg_d */
     10,        /* movi */
     7, /* movnr */
     7, /* movzr */
     10,        /* movi */
     7, /* movnr */
     7, /* movzr */
+    11,        /* casr */
+    16,        /* casi */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     3, /* extr_i */
     3, /* extr_ui */
     4, /* extr_c */
     4, /* extr_uc */
     4, /* extr_s */
     4, /* extr_us */
     3, /* extr_i */
     3, /* extr_ui */
+    9, /* bswapr_us */
+    6, /* bswapr_ui */
+    6, /* bswapr_ul */
     9, /* htonr_us */
     6, /* htonr_ui */
     6, /* htonr_ul */
     9, /* htonr_us */
     6, /* htonr_ui */
     6, /* htonr_ul */
     9, /* bxsubr_u */
     10,        /* bxsubi_u */
     2, /* jmpr */
     9, /* bxsubr_u */
     10,        /* bxsubi_u */
     2, /* jmpr */
-    13,        /* jmpi */
-    3, /* callr */
-    12,        /* calli */
+    5, /* jmpi */
+    2, /* callr */
+    13,        /* calli */
     0, /* prepare */
     0, /* prepare */
-    0, /* pushargr */
-    0, /* pushargi */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
     0, /* finishr */
     0, /* finishi */
     0, /* ret */
-    0, /* retr */
-    0, /* reti */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     0, /* retval_c */
     0, /* retval_uc */
     0, /* retval_s */
     10,        /* bltr_f */
     20,        /* blti_f */
     10,        /* bler_f */
     10,        /* bltr_f */
     20,        /* blti_f */
     10,        /* bler_f */
-    25,        /* blei_f */
+    22,        /* blei_f */
     12,        /* beqr_f */
     12,        /* beqr_f */
-    27,        /* beqi_f */
+    22,        /* beqi_f */
     10,        /* bger_f */
     10,        /* bger_f */
-    25,        /* bgei_f */
+    22,        /* bgei_f */
     10,        /* bgtr_f */
     10,        /* bgtr_f */
-    25,        /* bgti_f */
+    22,        /* bgti_f */
     13,        /* bner_f */
     13,        /* bner_f */
-    28,        /* bnei_f */
+    25,        /* bnei_f */
     10,        /* bunltr_f */
     10,        /* bunltr_f */
-    25,        /* bunlti_f */
+    23,        /* bunlti_f */
     10,        /* bunler_f */
     10,        /* bunler_f */
-    25,        /* bunlei_f */
+    23,        /* bunlei_f */
     10,        /* buneqr_f */
     10,        /* buneqr_f */
-    25,        /* buneqi_f */
+    23,        /* buneqi_f */
     10,        /* bunger_f */
     10,        /* bunger_f */
-    25,        /* bungei_f */
+    23,        /* bungei_f */
     10,        /* bungtr_f */
     10,        /* bungtr_f */
-    25,        /* bungti_f */
+    22,        /* bungti_f */
     10,        /* bltgtr_f */
     10,        /* bltgtr_f */
-    25,        /* bltgti_f */
+    22,        /* bltgti_f */
     10,        /* bordr_f */
     10,        /* bordr_f */
-    25,        /* bordi_f */
+    22,        /* bordi_f */
     10,        /* bunordr_f */
     10,        /* bunordr_f */
-    25,        /* bunordi_f */
+    22,        /* bunordi_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* pushargr_f */
     0, /* pushargi_f */
     0, /* retr_f */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
     0, /* movi_d_ww */
     0, /* movr_d_w */
     0, /* movi_d_w */
-    9, /* bswapr_us */
-    6, /* bswapr_ui */
-    6, /* bswapr_ul */
-    11,        /* casr */
-    16,        /* casi */
+    11,        /* clo */
+    5, /* clz */
+    11,        /* cto */
+    5, /* ctz */
 #endif /* __CYGWIN__ || _WIN32 */
 #  endif /* __X64_32 */
 #endif /* __X64 */
 #endif /* __CYGWIN__ || _WIN32 */
 #  endif /* __X64_32 */
 #endif /* __X64 */
index 227b1a2..3de0214 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -408,14 +408,14 @@ _x87_b##name##i_##type(jit_state_t *_jit,                         \
                       jit_word_t i0, jit_int32_t r0,                   \
                       jit_float##size##_t *i1)                         \
 {                                                                      \
                       jit_word_t i0, jit_int32_t r0,                   \
                       jit_float##size##_t *i1)                         \
 {                                                                      \
-    jit_word_t         word;                                           \
+    jit_word_t         w;                                              \
     jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
                                          jit_class_nospill);           \
     assert(jit_x87_reg_p(reg));                                                \
     x87_movi_##type(rn(reg), i1);                                      \
     jit_int32_t                reg = jit_get_reg(jit_class_fpr|                \
                                          jit_class_nospill);           \
     assert(jit_x87_reg_p(reg));                                                \
     x87_movi_##type(rn(reg), i1);                                      \
-    word = x87_b##name##r_##type(i0, r0, rn(reg));                     \
+    w = x87_b##name##r_##type(i0, r0, rn(reg));                                \
     jit_unget_reg(reg);                                                        \
     jit_unget_reg(reg);                                                        \
-    return (word);                                                     \
+    return (w);                                                                \
 }
 #  define fopi(name)                   fpr_opi(name, f, 32)
 #  define fbopi(name)                  fpr_bopi(name, f, 32)
 }
 #  define fopi(name)                   fpr_opi(name, f, 32)
 #  define fbopi(name)                  fpr_bopi(name, f, 32)
@@ -662,6 +662,7 @@ _x87_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
 #if defined(sun)
     /* for the sake of passing test cases in x87 mode, otherwise only sse
      * is supported */
 #if defined(sun)
     /* for the sake of passing test cases in x87 mode, otherwise only sse
      * is supported */
@@ -692,6 +693,7 @@ _x87_truncr_d_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     fldr(r1);
     fisttpqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
     ldxi(r0, _RBP_REGNO, CVT_OFFSET);
     fldr(r1);
     fisttpqm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
     ldxi(r0, _RBP_REGNO, CVT_OFFSET);
@@ -701,6 +703,7 @@ _x87_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _x87_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     stxi(CVT_OFFSET, _RBP_REGNO, r1);
 #  if __X32
     fildlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
     stxi(CVT_OFFSET, _RBP_REGNO, r1);
 #  if __X32
     fildlm(CVT_OFFSET, _RBP_REGNO, _NOREG, _SCL1);
@@ -771,8 +774,7 @@ _x87jcc(jit_state_t *_jit, jit_int32_t code,
        fldr(r0);
        fucomipr(r1 + 1);
     }
        fldr(r0);
        fucomipr(r1 + 1);
     }
-    jcc(code, i0);
-    return (_jit->pc.w);
+    return (jcc(code, i0));
 }
 
 static jit_word_t
 }
 
 static jit_word_t
@@ -788,8 +790,7 @@ _x87jcc2(jit_state_t *_jit, jit_int32_t code,
        fldr(f0);
        fucomipr(f1 + 1);
     }
        fldr(f0);
        fucomipr(f1 + 1);
     }
-    jcc(code, i0);
-    return (_jit->pc.w);
+    return (jcc(code, i0));
 }
 
 fopi(lt)
 }
 
 fopi(lt)
@@ -847,6 +848,7 @@ _x87_movi_f(jit_state_t *_jit, jit_int32_t r0, jit_float32_t *i0)
        fldln2();
     else {
        if (_jitc->no_data) {
        fldln2();
     else {
        if (_jitc->no_data) {
+           CHECK_CVT_OFFSET();
            reg = jit_get_reg(jit_class_gpr);
            movi(rn(reg), data.i);
            stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
            reg = jit_get_reg(jit_class_gpr);
            movi(rn(reg), data.i);
            stxi_i(CVT_OFFSET, _RBP_REGNO, rn(reg));
@@ -1038,6 +1040,7 @@ _x87_movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
        fldln2();
     else {
        if (_jitc->no_data) {
        fldln2();
     else {
        if (_jitc->no_data) {
+           CHECK_CVT_OFFSET();
            reg = jit_get_reg(jit_class_gpr);
 #if __X32 || __X64_32
            movi(rn(reg), data.ii[0]);
            reg = jit_get_reg(jit_class_gpr);
 #if __X32 || __X64_32
            movi(rn(reg), data.ii[0]);
@@ -1082,10 +1085,9 @@ _x87_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        fldr(f1);
        fucomipr(f2 + 1);
     }
        fldr(f1);
        fucomipr(f2 + 1);
     }
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_E, reg);
     cc(X86_CC_E, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
     if (!rc)
        xchgr(r0, reg);
 }
@@ -1115,10 +1117,9 @@ _x87_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
        fldr(f1);
        fucomipr(f2 + 1);
     }
        fldr(f1);
        fucomipr(f2 + 1);
     }
-    jpes(0);
-    jp_code = _jit->pc.w;
+    jp_code = jpes(0);
     cc(X86_CC_NE, reg);
     cc(X86_CC_NE, reg);
-    patch_rel_char(jp_code, _jit->pc.w);
+    patch_at(jp_code, _jit->pc.w);
     if (!rc)
        xchgr(r0, reg);
 }
     if (!rc)
        xchgr(r0, reg);
 }
@@ -1283,6 +1284,7 @@ dbopi(le)
 static jit_word_t
 _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 static jit_word_t
 _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t                 w;
     jit_int32_t                        f0, f1;
     jit_word_t                 jp_code;
     if (r1 == _ST0_REGNO)      f0 = r1, f1 = r0;
     jit_int32_t                        f0, f1;
     jit_word_t                 jp_code;
     if (r1 == _ST0_REGNO)      f0 = r1, f1 = r0;
@@ -1293,11 +1295,10 @@ _x87_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
        fldr(f0);
        fucomipr(f1 + 1);
     }
        fldr(f0);
        fucomipr(f1 + 1);
     }
-    jpes(0);
-    jp_code = _jit->pc.w;
-    jcc(X86_CC_E, i0);
-    patch_rel_char(jp_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jpes(0);
+    w = jcc(X86_CC_E, i0);
+    patch_at(jp_code, _jit->pc.w);
+    return (w);
 }
 dbopi(eq)
 dbopi(ge)
 }
 dbopi(eq)
 dbopi(ge)
@@ -1306,6 +1307,7 @@ dbopi(gt)
 static jit_word_t
 _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
 static jit_word_t
 _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
+    jit_word_t                 w;
     jit_int32_t                        f0, f1;
     jit_word_t                 jp_code;
     jit_word_t                 jz_code;
     jit_int32_t                        f0, f1;
     jit_word_t                 jp_code;
     jit_word_t                 jz_code;
@@ -1317,14 +1319,12 @@ _x87_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
        fldr(f0);
        fucomipr(f1 + 1);
     }
        fldr(f0);
        fucomipr(f1 + 1);
     }
-    jpes(0);
-    jp_code = _jit->pc.w;
-    jzs(0);
-    jz_code = _jit->pc.w;
-    patch_rel_char(jp_code, _jit->pc.w);
-    jmpi(i0);
-    patch_rel_char(jz_code, _jit->pc.w);
-    return (_jit->pc.w);
+    jp_code = jpes(0);
+    jz_code = jzs(0);
+    patch_at(jp_code, _jit->pc.w);
+    w = jmpi(i0);
+    patch_at(jz_code, _jit->pc.w);
+    return (w);
 }
 dbopi(ne)
 dbopi(unlt)
 }
 dbopi(ne)
 dbopi(unlt)
index 6472e56..b409457 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
 #include <lightning/jit_private.h>
 
 #if __X32
 #include <lightning/jit_private.h>
 
 #if __X32
+#  define CAN_RIP_ADDRESS              0
+#  define address_p(i0)                        1
 #  define jit_arg_reg_p(i)             0
 #  define jit_arg_f_reg_p(i)           0
 #  define jit_arg_reg_p(i)             0
 #  define jit_arg_f_reg_p(i)           0
-#  define stack_framesize              20
-#  define stack_adjust                 12
-#  define CVT_OFFSET                   -12
+/* callee save                        + 16 byte align
+ * align16(%ebp + %rbx + %rsi + %rdi) + (16 - 4)  */
+#  define stack_framesize              28
 #  define REAL_WORDSIZE                        4
 #  define va_gp_increment              4
 #  define va_fp_increment              8
 #else
 #  define REAL_WORDSIZE                        4
 #  define va_gp_increment              4
 #  define va_fp_increment              8
 #else
+#  if _WIN32 || __X64_32
+#    define CAN_RIP_ADDRESS            0
+#  else
+#    define CAN_RIP_ADDRESS            1
+#  endif
+#  if __X64_32
+#    define address_p(i0)              ((jit_word_t)(i0) >= 0)
+#  else
+#    define address_p(i0)              can_sign_extend_int_p(i0)
+#  endif
 #  if __CYGWIN__ || _WIN32
 #    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 4)
 #    define jit_arg_f_reg_p(i)         jit_arg_reg_p(i)
 #  if __CYGWIN__ || _WIN32
 #    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 4)
 #    define jit_arg_f_reg_p(i)         jit_arg_reg_p(i)
+/* callee save                                                + 16 byte align
+ * align16(%rbp+%rbx+%rdi+%rsi+%r1[2-5]+%xmm[6-9]+%xmm1[0-5]) + (16 - 8) */
 #    define stack_framesize            152
 #    define va_fp_increment            8
 #  else
 #    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 6)
 #    define jit_arg_f_reg_p(i)         ((i) >= 0 && (i) < 8)
 #    define stack_framesize            152
 #    define va_fp_increment            8
 #  else
 #    define jit_arg_reg_p(i)           ((i) >= 0 && (i) < 6)
 #    define jit_arg_f_reg_p(i)         ((i) >= 0 && (i) < 8)
+/* callee save                                      + 16 byte align
+ * align16(%rbp + %r15 + %r14 + %r13 + %r12 + %rbx) + (16 - 8) */
 #    define stack_framesize            56
 #    define first_gp_argument          rdi
 #    define first_gp_offset            offsetof(jit_va_list_t, rdi)
 #    define stack_framesize            56
 #    define first_gp_argument          rdi
 #    define first_gp_offset            offsetof(jit_va_list_t, rdi)
 #    define first_fp_from_offset(fp)   (((fp) - va_gp_max_offset) / 16)
 #  endif
 #  define va_gp_increment              8
 #    define first_fp_from_offset(fp)   (((fp) - va_gp_max_offset) / 16)
 #  endif
 #  define va_gp_increment              8
-#  define stack_adjust                 8
-#  define CVT_OFFSET                   -8
 #  define REAL_WORDSIZE                        8
 #endif
 #  define REAL_WORDSIZE                        8
 #endif
+#define CVT_OFFSET                     _jitc->function->cvt_offset
+
+#define CHECK_CVT_OFFSET()                                             \
+    do {                                                               \
+       if (!_jitc->function->cvt_offset) {                             \
+           _jitc->again = 1;                                           \
+           _jitc->function->cvt_offset =                               \
+                jit_allocai(sizeof(jit_float64_t));                    \
+       }                                                               \
+    } while (0)
 
 /*
  * Types
 
 /*
  * Types
@@ -99,6 +123,8 @@ typedef struct jit_va_list {
 /*
  * Prototypes
  */
 /*
  * Prototypes
  */
+#define compute_framesize()            _compute_framesize(_jit)
+static void _compute_framesize(jit_state_t*);
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define sse_from_x87_f(r0, r1)         _sse_from_x87_f(_jit, r0, r1)
 #define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 #define sse_from_x87_f(r0, r1)         _sse_from_x87_f(_jit, r0, r1)
@@ -227,6 +253,22 @@ jit_register_t             _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
     { _NOREG,                          "<none>" },
 };
 
+static jit_int32_t iregs[] = {
+#if __X32
+    _RBX, _RSI, _RDI,
+#elif (__CYGWIN__ || _WIN32)
+    _RBX, _RDI, _RSI, _R12, _R13, _R14, _R15,
+#else
+    _R15, _R14, _R13, _R12, _RBX,
+#endif
+};
+
+#if __X64 && (__CYGWIN__ || _WIN32)
+static jit_int32_t fregs[] = {
+    _XMM6, _XMM7, _XMM8, _XMM9, _XMM10, _XMM11, _XMM12, _XMM13, _XMM14, _XMM15,
+};
+#endif
+
 /*
  * Implementation
  */
 /*
  * Implementation
  */
@@ -234,6 +276,45 @@ void
 jit_get_cpu(void)
 {
     union {
 jit_get_cpu(void)
 {
     union {
+       /* eax=7 and ecx=0 */
+       struct {
+           jit_uword_t fsgsbase        : 1;
+           jit_uword_t IA32_TSC_ADJUST : 1;
+           jit_uword_t sgx             : 1;
+           jit_uword_t bmi1            : 1;
+           jit_uword_t hle             : 1;
+           jit_uword_t avx2            : 1;
+           jit_uword_t FDP_EXCPTN_ONLY : 1;
+           jit_uword_t smep            : 1;
+           jit_uword_t bmi2            : 1;
+           jit_uword_t erms            : 1;
+           jit_uword_t invpcid         : 1;
+           jit_uword_t rtm             : 1;
+           jit_uword_t rdt_m_pqm       : 1;
+           jit_uword_t dep_FPU_CS_DS   : 1;
+           jit_uword_t mpx             : 1;
+           jit_uword_t rdt_a_pqe       : 1;
+           jit_uword_t avx512_f        : 1;
+           jit_uword_t avx512_dq       : 1;
+           jit_uword_t rdseed          : 1;
+           jit_uword_t adx             : 1;
+           jit_uword_t smap            : 1;
+           jit_uword_t avx512_ifma     : 1;
+           jit_uword_t __reserved0     : 1;
+           jit_uword_t clflushopt      : 1;
+           jit_uword_t clwb            : 1;
+           jit_uword_t pt              : 1;
+           jit_uword_t avx512_pf       : 1;
+           jit_uword_t avx512_er       : 1;
+           jit_uword_t avx512_cd       : 1;
+           jit_uword_t sha             : 1;
+           jit_uword_t avx512_bw       : 1;
+           jit_uword_t avx512_vl       : 1;
+       } bits;
+       jit_uword_t     cpuid;
+    } ebx;
+    union {
+       /* eax=0 */
        struct {
            jit_uint32_t sse3           : 1;
            jit_uint32_t pclmulqdq      : 1;
        struct {
            jit_uint32_t sse3           : 1;
            jit_uint32_t pclmulqdq      : 1;
@@ -271,6 +352,7 @@ jit_get_cpu(void)
        jit_uword_t     cpuid;
     } ecx;
     union {
        jit_uword_t     cpuid;
     } ecx;
     union {
+       /* eax=0 */
        struct {
            jit_uint32_t fpu            : 1;
            jit_uint32_t vme            : 1;
        struct {
            jit_uint32_t fpu            : 1;
            jit_uint32_t vme            : 1;
@@ -310,7 +392,7 @@ jit_get_cpu(void)
 #if __X32
     int                        ac, flags;
 #endif
 #if __X32
     int                        ac, flags;
 #endif
-    jit_uword_t                eax, ebx;
+    jit_uword_t                eax;
 
 #if __X32
     /* adapted from glibc __sysconf */
 
 #if __X32
     /* adapted from glibc __sysconf */
@@ -339,7 +421,7 @@ jit_get_cpu(void)
 #else
     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
 #endif
 #else
     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
 #endif
-                     : "=a" (eax), "=r" (ebx),
+                     : "=a" (eax), "=r" (ebx.cpuid),
                      "=c" (ecx.cpuid), "=d" (edx.cpuid)
                      : "0" (1));
 
                      "=c" (ecx.cpuid), "=d" (edx.cpuid)
                      : "0" (1));
 
@@ -361,6 +443,15 @@ jit_get_cpu(void)
     jit_cpu.aes                = ecx.bits.aes;
     jit_cpu.avx                = ecx.bits.avx;
 
     jit_cpu.aes                = ecx.bits.aes;
     jit_cpu.avx                = ecx.bits.avx;
 
+    /* query %eax = 7 and ecx = 0 function */
+#if __X64
+    __asm__ volatile ("cpuid"
+                     : "=a" (eax), "=b" (ebx.cpuid), "=c" (ecx), "=d" (edx)
+                     : "a" (7), "c" (0));
+#endif
+    jit_cpu.adx = ebx.bits.adx;
+
+
     /* query %eax = 0x80000001 function */
 #if __X64
 #  if __X64_32
     /* query %eax = 0x80000001 function */
 #if __X64
 #  if __X64_32
@@ -368,10 +459,11 @@ jit_get_cpu(void)
 #  else
     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
 #  endif
 #  else
     __asm__ volatile ("xchgq %%rbx, %1; cpuid; xchgq %%rbx, %1"
 #  endif
-                     : "=a" (eax), "=r" (ebx),
+                     : "=a" (eax), "=r" (ebx.cpuid),
                      "=c" (ecx.cpuid), "=d" (edx.cpuid)
                      : "0" (0x80000001));
                      "=c" (ecx.cpuid), "=d" (edx.cpuid)
                      : "0" (0x80000001));
-    jit_cpu.lahf       = ecx.cpuid & 1;
+    jit_cpu.lahf       = !!(ecx.cpuid & 1);
+    jit_cpu.abm                = !!(ecx.cpuid & 32);
 #endif
 }
 
 #endif
 }
 
@@ -414,11 +506,15 @@ _jit_prolog(jit_state_t *_jit)
        _jitc->functions.length += 16;
     }
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
        _jitc->functions.length += 16;
     }
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
-    _jitc->function->self.size = stack_framesize;
+    /* One extra stack slot for implicit saved returned address */
+    _jitc->function->self.size = stack_framesize + REAL_WORDSIZE;
     _jitc->function->self.argi = _jitc->function->self.argf =
        _jitc->function->self.aoff = _jitc->function->self.alen = 0;
     _jitc->function->self.argi = _jitc->function->self.argf =
        _jitc->function->self.aoff = _jitc->function->self.alen = 0;
-    /* sse/x87 conversion */
-    _jitc->function->self.aoff = CVT_OFFSET;
+    _jitc->function->cvt_offset = 0;
+#if __X64 && (__CYGWIN__ || _WIN32)
+    /* force framepointer */
+    jit_check_frame();
+#endif
     _jitc->function->self.call = jit_call_default;
     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
              _jitc->reglen * sizeof(jit_int32_t));
     _jitc->function->self.call = jit_call_default;
     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
              _jitc->reglen * sizeof(jit_int32_t));
@@ -444,6 +540,13 @@ jit_int32_t
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
 _jit_allocai(jit_state_t *_jit, jit_int32_t length)
 {
     assert(_jitc->function);
+    jit_check_frame();
+#if __X32
+    /* Stack is 4 bytes aligned but jit functions keep it 8 bytes aligned.
+     * Called functions have 16 byte aligned stack. */
+    if (!_jitc->function->self.aoff)
+       _jitc->function->self.aoff = -4;
+#endif
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
     switch (length) {
        case 0: case 1:                                         break;
        case 2:         _jitc->function->self.aoff &= -2;       break;
@@ -500,22 +603,18 @@ _jit_ret(jit_state_t *_jit)
 }
 
 void
 }
 
 void
-_jit_retr(jit_state_t *_jit, jit_int32_t u)
-{
-    jit_inc_synth_w(retr, u);
-    /* movr(%ret, %ret) would be optimized out */
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    /* explicitly tell it is live */
-    jit_live(JIT_RET);
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
+{
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
 }
 
 void
     jit_ret();
     jit_dec_synth();
 }
 
 void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
 {
-    jit_inc_synth_w(reti, u);
+    jit_code_inc_synth_w(code, u);
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
     jit_movi(JIT_RET, u);
     jit_ret();
     jit_dec_synth();
@@ -575,7 +674,7 @@ _jit_epilog(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
 jit_bool_t
 _jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
 {
-    if (u->code == jit_code_arg)
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
        return (jit_arg_reg_p(u->u.w));
     assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
     return (jit_arg_f_reg_p(u->u.w));
@@ -585,6 +684,7 @@ void
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
 _jit_ellipsis(jit_state_t *_jit)
 {
     jit_inc_synth(ellipsis);
+    jit_check_frame();
     if (_jitc->prepare) {
        jit_link_prepare();
        /* Remember that a varargs function call is being constructed. */
     if (_jitc->prepare) {
        jit_link_prepare();
        /* Remember that a varargs function call is being constructed. */
@@ -629,12 +729,15 @@ _jit_va_push(jit_state_t *_jit, jit_int32_t u)
 }
 
 jit_node_t *
 }
 
 jit_node_t *
-_jit_arg(jit_state_t *_jit)
+_jit_arg(jit_state_t *_jit, jit_code_t code)
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
     assert(!(_jitc->function->self.call & jit_call_varargs));
 {
     jit_node_t         *node;
     jit_int32_t                 offset;
     assert(_jitc->function);
     assert(!(_jitc->function->self.call & jit_call_varargs));
+#if STRONG_TYPE_CHECKING
+    assert(code >= jit_code_arg_c && code <= jit_code_arg);
+#endif
 #if __X64
     if (jit_arg_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi++;
 #if __X64
     if (jit_arg_reg_p(_jitc->function->self.argi)) {
        offset = _jitc->function->self.argi++;
@@ -647,8 +750,9 @@ _jit_arg(jit_state_t *_jit)
     {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += REAL_WORDSIZE;
     {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += REAL_WORDSIZE;
+       jit_check_frame();
     }
     }
-    node = jit_new_node_ww(jit_code_arg, offset,
+    node = jit_new_node_ww(code, offset,
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
                           ++_jitc->function->self.argn);
     jit_link_prolog();
     return (node);
@@ -676,6 +780,7 @@ _jit_arg_f(jit_state_t *_jit)
     {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += REAL_WORDSIZE;
     {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += REAL_WORDSIZE;
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
     }
     node = jit_new_node_ww(jit_code_arg_f, offset,
                           ++_jitc->function->self.argn);
@@ -705,6 +810,7 @@ _jit_arg_d(jit_state_t *_jit)
     {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_float64_t);
     {
        offset = _jitc->function->self.size;
        _jitc->function->self.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
     }
     node = jit_new_node_ww(jit_code_arg_d, offset,
                           ++_jitc->function->self.argn);
@@ -715,63 +821,75 @@ _jit_arg_d(jit_state_t *_jit)
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_c, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
     else
 #endif
     jit_inc_synth_wp(getarg_c, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_c(u, JIT_RA0 - v->u.w);
     else
 #endif
-       jit_ldxi_c(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_c(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_c);
     jit_inc_synth_wp(getarg_uc, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
     else
 #endif
     jit_inc_synth_wp(getarg_uc, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_uc(u, JIT_RA0 - v->u.w);
     else
 #endif
-       jit_ldxi_uc(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_uc(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_s, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
     else
 #endif
     jit_inc_synth_wp(getarg_s, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_s(u, JIT_RA0 - v->u.w);
     else
 #endif
-       jit_ldxi_s(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_s(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_s);
     jit_inc_synth_wp(getarg_us, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
     else
 #endif
     jit_inc_synth_wp(getarg_us, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_extr_us(u, JIT_RA0 - v->u.w);
     else
 #endif
-       jit_ldxi_us(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_us(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_i, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w)) {
     jit_inc_synth_wp(getarg_i, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w)) {
@@ -783,7 +901,10 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
      }
     else
 #endif
      }
     else
 #endif
-       jit_ldxi_i(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_i(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -791,57 +912,66 @@ _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_i);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_ui, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_extr_ui(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_ui(u, _RBP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_ui(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
     jit_dec_synth();
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    assert(v->code == jit_code_arg);
+    assert_arg_type(v->code, jit_code_arg_l);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
     jit_inc_synth_wp(getarg_l, u, v);
     if (jit_arg_reg_p(v->u.w))
        jit_movr(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_l(u, _RBP, v->u.w);
+    else {
+       jit_node_t      *node = jit_ldxi_l(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 #endif
 
 void
     jit_dec_synth();
 }
 #endif
 
 void
-_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
 {
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     else
 #endif
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_movr(JIT_RA0 - v->u.w, u);
     else
 #endif
-       jit_stxi(v->u.w, _RBP, u);
+    {
+       jit_node_t      *node = jit_stxi(v->u.w, _RBP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
 void
     jit_dec_synth();
 }
 
 void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
 {
     jit_int32_t                regno;
 {
     jit_int32_t                regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else
 #endif
     {
 #if __X64
     if (jit_arg_reg_p(v->u.w))
        jit_movi(JIT_RA0 - v->u.w, u);
     else
 #endif
     {
+       jit_node_t      *node;
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
-       jit_stxi(v->u.w, _RBP, regno);
+       node = jit_stxi(v->u.w, _RBP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -857,7 +987,10 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_f(u, _XMM0 - v->u.w);
     else
 #endif
        jit_movr_f(u, _XMM0 - v->u.w);
     else
 #endif
-       jit_ldxi_f(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_f(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -867,11 +1000,14 @@ _jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(putargr_f, u, v);
 #if __X64
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_wp(putargr_f, u, v);
 #if __X64
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_f_reg_p(v->u.w))
        jit_movr_f(_XMM0 - v->u.w, u);
     else
 #endif
        jit_movr_f(_XMM0 - v->u.w, u);
     else
 #endif
-       jit_stxi_f(v->u.w, _RBP, u);
+    {
+       jit_node_t      *node = jit_stxi_f(v->u.w, _RBP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -882,14 +1018,16 @@ _jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_fp(putargi_f, u, v);
 #if __X64
     assert(v->code == jit_code_arg_f);
     jit_inc_synth_fp(putargi_f, u, v);
 #if __X64
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_f_reg_p(v->u.w))
        jit_movi_f(_XMM0 - v->u.w, u);
     else
 #endif
     {
        jit_movi_f(_XMM0 - v->u.w, u);
     else
 #endif
     {
-       regno = jit_get_reg(jit_class_gpr);
+       jit_node_t      *node;
+       regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
        jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, _RBP, regno);
+       node = jit_stxi_f(v->u.w, _RBP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
        jit_unget_reg(regno);
     }
     jit_dec_synth();
@@ -905,7 +1043,10 @@ _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
        jit_movr_d(u, _XMM0 - v->u.w);
     else
 #endif
        jit_movr_d(u, _XMM0 - v->u.w);
     else
 #endif
-       jit_ldxi_d(u, _RBP, v->u.w);
+    {
+       jit_node_t      *node = jit_ldxi_d(u, _RBP, v->u.w);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -915,11 +1056,14 @@ _jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(putargr_d, u, v);
 #if __X64
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_wp(putargr_d, u, v);
 #if __X64
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_f_reg_p(v->u.w))
        jit_movr_d(_XMM0 - v->u.w, u);
     else
 #endif
        jit_movr_d(_XMM0 - v->u.w, u);
     else
 #endif
-       jit_stxi_d(v->u.w, _RBP, u);
+    {
+       jit_node_t      *node = jit_stxi_d(v->u.w, _RBP, u);
+       jit_link_alist(node);
+    }
     jit_dec_synth();
 }
 
     jit_dec_synth();
 }
 
@@ -930,24 +1074,26 @@ _jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_dp(putargi_d, u, v);
 #if __X64
     assert(v->code == jit_code_arg_d);
     jit_inc_synth_dp(putargi_d, u, v);
 #if __X64
-    if (jit_arg_reg_p(v->u.w))
+    if (jit_arg_f_reg_p(v->u.w))
        jit_movi_d(_XMM0 - v->u.w, u);
     else
 #endif
     {
        jit_movi_d(_XMM0 - v->u.w, u);
     else
 #endif
     {
-       regno = jit_get_reg(jit_class_gpr);
+       jit_node_t      *node;
+       regno = jit_get_reg(jit_class_fpr);
        jit_movi_d(regno, u);
        jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, _RBP, regno);
+       node = jit_stxi_d(v->u.w, _RBP, regno);
+       jit_link_alist(node);
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
        jit_unget_reg(regno);
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargr(jit_state_t *_jit, jit_int32_t u)
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
 {
     assert(_jitc->function);
 {
     assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
 #if __X64
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
     jit_link_prepare();
 #if __X64
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -964,16 +1110,17 @@ _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
     {
        jit_stxi(_jitc->function->call.size, _RSP, u);
        _jitc->function->call.size += REAL_WORDSIZE;
     {
        jit_stxi(_jitc->function->call.size, _RSP, u);
        _jitc->function->call.size += REAL_WORDSIZE;
+       jit_check_frame();
     }
     jit_dec_synth();
 }
 
 void
     }
     jit_dec_synth();
 }
 
 void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
 {
     jit_int32_t                 regno;
     assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
+    jit_code_inc_synth_w(code, u);
     jit_link_prepare();
 #if __X64
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
     jit_link_prepare();
 #if __X64
     if (jit_arg_reg_p(_jitc->function->call.argi)) {
@@ -994,6 +1141,7 @@ _jit_pushargi(jit_state_t *_jit, jit_word_t u)
        jit_stxi(_jitc->function->call.size, _RSP, regno);
        _jitc->function->call.size += REAL_WORDSIZE;
        jit_unget_reg(regno);
        jit_stxi(_jitc->function->call.size, _RSP, regno);
        _jitc->function->call.size += REAL_WORDSIZE;
        jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -1028,6 +1176,7 @@ _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
     {
        jit_stxi_f(_jitc->function->call.size, _RSP, u);
        _jitc->function->call.size += REAL_WORDSIZE;
     {
        jit_stxi_f(_jitc->function->call.size, _RSP, u);
        _jitc->function->call.size += REAL_WORDSIZE;
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -1066,6 +1215,7 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
        jit_stxi_f(_jitc->function->call.size, _RSP, regno);
        _jitc->function->call.size += REAL_WORDSIZE;
        jit_unget_reg(regno);
        jit_stxi_f(_jitc->function->call.size, _RSP, regno);
        _jitc->function->call.size += REAL_WORDSIZE;
        jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -1100,6 +1250,7 @@ _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
     {
        jit_stxi_d(_jitc->function->call.size, _RSP, u);
        _jitc->function->call.size += sizeof(jit_float64_t);
     {
        jit_stxi_d(_jitc->function->call.size, _RSP, u);
        _jitc->function->call.size += sizeof(jit_float64_t);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -1138,6 +1289,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        jit_stxi_d(_jitc->function->call.size, _RSP, regno);
        _jitc->function->call.size += sizeof(jit_float64_t);
        jit_unget_reg(regno);
        jit_stxi_d(_jitc->function->call.size, _RSP, regno);
        _jitc->function->call.size += sizeof(jit_float64_t);
        jit_unget_reg(regno);
+       jit_check_frame();
     }
     jit_dec_synth();
 }
     }
     jit_dec_synth();
 }
@@ -1171,6 +1323,7 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
     jit_int32_t                 reg;
     jit_node_t         *call;
     assert(_jitc->function);
     jit_int32_t                 reg;
     jit_node_t         *call;
     assert(_jitc->function);
+    jit_check_frame();
     reg = r0;
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
     reg = r0;
     jit_inc_synth_w(finishr, r0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
@@ -1203,32 +1356,26 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 jit_node_t *
 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
 jit_node_t *
 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
-#if __X64
-    jit_int32_t                reg;
-#endif
     jit_node_t         *node;
     assert(_jitc->function);
     jit_node_t         *node;
     assert(_jitc->function);
+    jit_check_frame();
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
 #if __X64
     jit_inc_synth_w(finishi, (jit_word_t)i0);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
 #if __X64
-    /* FIXME preventing %rax allocation is good enough, but for consistency
-     * it should automatically detect %rax is dead, in case it has run out
-     * registers, and not save/restore it, what would be wrong if using the
-     * the return value, otherwise, just a needless noop */
-    /* >> prevent %rax from being allocated as the function pointer */
-    jit_regset_setbit(&_jitc->regarg, _RAX);
-    reg = jit_get_reg(jit_class_gpr);
-    node = jit_movi(reg, (jit_word_t)i0);
-    jit_finishr(reg);
-    jit_unget_reg(reg);
-    /* << prevent %rax from being allocated as the function pointer */
-    jit_regset_clrbit(&_jitc->regarg, _RAX);
-#else
+#  if !(__CYGWIN__ || _WIN32)
+    if (_jitc->function->call.call & jit_call_varargs) {
+       if (_jitc->function->call.argf)
+           jit_movi(_RAX, _jitc->function->call.argf);
+       else
+           jit_movi(_RAX, 0);
+       jit_live(_RAX);
+    }
+#  endif
+#endif
     node = jit_calli(i0);
     node->v.w = _jitc->function->call.argi;
     node->w.w = _jitc->function->call.argf;
     node = jit_calli(i0);
     node->v.w = _jitc->function->call.argi;
     node->w.w = _jitc->function->call.argf;
-#endif
     _jitc->function->call.argi = _jitc->function->call.argf =
        _jitc->function->call.size = 0;
     _jitc->prepare = 0;
     _jitc->function->call.argi = _jitc->function->call.argf =
        _jitc->function->call.size = 0;
     _jitc->prepare = 0;
@@ -1333,6 +1480,7 @@ _emit_code(jit_state_t *_jit)
     struct {
        jit_node_t      *node;
        jit_word_t       word;
     struct {
        jit_node_t      *node;
        jit_word_t       word;
+       jit_function_t   func;
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
 #if DEVEL_DISASSEMBLER
        jit_word_t       prevw;
 #endif
@@ -1598,7 +1746,10 @@ _emit_code(jit_state_t *_jit)
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
                if ((word = _jit->pc.w & (node->u.w - 1)))
                    nop(node->u.w - word);
                break;
-           case jit_code_note:         case jit_code_name:
+            case jit_code_skip:
+                nop(node->u.w);
+                break;
+            case jit_code_note:                case jit_code_name:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_label:
                node->u.w = _jit->pc.w;
                break;
            case jit_code_label:
@@ -1654,6 +1805,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
                case_rrw(rsh, _u);
                case_rr(neg,);
                case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
                case_rrr(lt,);
                case_rrw(lt,);
                case_rrr(lt, _u);
@@ -1695,7 +1850,14 @@ _emit_code(jit_state_t *_jit)
                    else {
                        assert(temp->code == jit_code_label ||
                               temp->code == jit_code_epilog);
                    else {
                        assert(temp->code == jit_code_label ||
                               temp->code == jit_code_epilog);
-                       word = movi_p(rn(node->u.w), node->v.w);
+#if CAN_RIP_ADDRESS
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if ((jit_int32_t)word == word)
+                           word = movi(rn(node->u.w), _jit->pc.w);
+                       else
+#endif
+                           word = movi_p(rn(node->u.w), node->v.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
@@ -2017,6 +2179,7 @@ _emit_code(jit_state_t *_jit)
                case_bff(unord, _d);
                case_bfw(unord, _d, 64);
            case jit_code_jmpr:
                case_bff(unord, _d);
                case_bfw(unord, _d, 64);
            case jit_code_jmpr:
+               jit_check_frame();
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
@@ -2027,14 +2190,24 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        jmpi(temp->u.w);
                    else {
-                       word = jmpi_p(_jit->pc.w);
+#if __X64
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if ((jit_int32_t)word == word)
+                           word = jmpi(_jit->pc.w);
+                       else
+#endif
+                           word = jmpi_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    jmpi(node->u.w);
                    jmpi(node->u.w);
+               }
                break;
            case jit_code_callr:
                break;
            case jit_code_callr:
+               jit_check_frame();
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
                callr(rn(node->u.w));
                break;
            case jit_code_calli:
@@ -2045,22 +2218,34 @@ _emit_code(jit_state_t *_jit)
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
                    if (temp->flag & jit_flag_patch)
                        calli(temp->u.w);
                    else {
-                       word = calli_p(_jit->pc.w);
+#if __X64
+                       word = _jit->code.length -
+                           (_jit->pc.uc - _jit->code.ptr);
+                       if ((jit_int32_t)word == word)
+                           word = calli(_jit->pc.w);
+                       else
+#endif
+                           word = calli_p(_jit->pc.w);
                        patch(word, node);
                    }
                }
                        patch(word, node);
                    }
                }
-               else
+               else {
+                   jit_check_frame();
                    calli(node->u.w);
                    calli(node->u.w);
+               }
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                break;
            case jit_code_prolog:
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
+               memcpy(&undo.func, _jitc->function, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
 #if DEVEL_DISASSEMBLER
                undo.prevw = prevw;
 #endif
                undo.patch_offset = _jitc->patches.offset;
            restart_function:
+               compute_framesize();
+               patch_alist(0);
                _jitc->again = 0;
                prolog(node);
                break;
                _jitc->again = 0;
                prolog(node);
                break;
@@ -2076,10 +2261,29 @@ _emit_code(jit_state_t *_jit)
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    temp->flag &= ~jit_flag_patch;
                    node = undo.node;
                    _jit->pc.w = undo.word;
+                   /* undo.func.self.aoff and undo.func.regset should not
+                    * be undone, as they will be further updated, and are
+                    * the reason of the undo. */
+                   undo.func.self.aoff = _jitc->function->frame +
+                       _jitc->function->self.aoff;
+                   undo.func.need_frame = _jitc->function->need_frame;
+                   jit_regset_set(&undo.func.regset, &_jitc->function->regset);
+                   /* allocar information also does not need to be undone */
+                   undo.func.aoffoff = _jitc->function->aoffoff;
+                   undo.func.allocar = _jitc->function->allocar;
+                   /* real stack framesize is not in the jit_function_t,
+                    * if it were, would need to not be undone  */
+                   /* cvt_offset must also not be undone */
+                   undo.func.cvt_offset = _jitc->function->cvt_offset;
+                   /* this will be recomputed but undo anyway to have it
+                    * better self documented.*/
+                   undo.func.need_stack = _jitc->function->need_stack;
+                   memcpy(_jitc->function, &undo.func, sizeof(undo.func));
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
 #if DEVEL_DISASSEMBLER
                    prevw = undo.prevw;
 #endif
                    _jitc->patches.offset = undo.patch_offset;
+                   patch_alist(1);
                    goto restart_function;
                }
                if (node->link &&
                    goto restart_function;
                }
                if (node->link &&
@@ -2103,11 +2307,23 @@ _emit_code(jit_state_t *_jit)
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
            case jit_code_live:                 case jit_code_ellipsis:
            case jit_code_va_push:
            case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+#  if __WORDSIZE == 64
+           case jit_code_arg_l:
+#  endif
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
            case jit_code_arg_f:                case jit_code_arg_d:
            case jit_code_va_end:
            case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+#if __WORDSIZE == 64
+           case jit_code_retr_ui:              case jit_code_reti_ui:
+           case jit_code_retr_l:               case jit_code_reti_l:
+#endif
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
            case jit_code_retr_f:               case jit_code_reti_f:
            case jit_code_retr_d:               case jit_code_reti_d:
            case jit_code_getarg_c:             case jit_code_getarg_uc:
@@ -2117,10 +2333,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
            case jit_code_getarg_ui:            case jit_code_getarg_l:
 #endif
            case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+#if __WORDSIZE == 64
+           case jit_code_putargr_ui:           case jit_code_putargi_ui:
+           case jit_code_putargr_l:            case jit_code_putargi_l:
+#endif
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
            case jit_code_putargr_f:            case jit_code_putargi_f:
            case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+#if __WORDSIZE == 64
+           case jit_code_pushargr_ui:          case jit_code_pushargi_ui:
+           case jit_code_pushargr_l:           case jit_code_pushargi_l:
+#endif
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
            case jit_code_pushargr_f:           case jit_code_pushargi_f:
            case jit_code_pushargr_d:           case jit_code_pushargi_d:
            case jit_code_retval_c:             case jit_code_retval_uc:
@@ -2182,7 +2414,7 @@ _emit_code(jit_state_t *_jit)
     for (offset = 0; offset < _jitc->patches.offset; offset++) {
        node = _jitc->patches.ptr[offset].node;
        word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
     for (offset = 0; offset < _jitc->patches.offset; offset++) {
        node = _jitc->patches.ptr[offset].node;
        word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
-       patch_at(node, _jitc->patches.ptr[offset].inst, word);
+       patch_at(_jitc->patches.ptr[offset].inst, word);
     }
 
     jit_flush(_jit->code.ptr, _jit->pc.uc);
     }
 
     jit_flush(_jit->code.ptr, _jit->pc.uc);
@@ -2231,6 +2463,26 @@ _emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
        sse_stxi_d(i0, rn(r0), rn(r1));
 }
 
        sse_stxi_d(i0, rn(r0), rn(r1));
 }
 
+static void
+_compute_framesize(jit_state_t *_jit)
+{
+    jit_int32_t                reg;
+    /* Save stack pointer in first slot */
+    _jitc->framesize = REAL_WORDSIZE;
+    for (reg = 0; reg < jit_size(iregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, iregs[reg]))
+           _jitc->framesize += REAL_WORDSIZE;
+
+#if __X64 && (__CYGWIN__ || _WIN32)
+    for (reg = 0; reg < jit_size(fregs); reg++)
+       if (jit_regset_tstbit(&_jitc->function->regset, fregs[reg]))
+           _jitc->framesize += sizeof(jit_float64_t);
+#endif
+    /* Make sure functions called have a 16 byte aligned stack */
+    _jitc->framesize = (_jitc->framesize + 15) & -16;
+    _jitc->framesize += 16 - REAL_WORDSIZE;
+}
+
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
@@ -2256,6 +2508,7 @@ _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 static void
 _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
     sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
 }
     x87_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
     sse_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
 }
@@ -2263,6 +2516,7 @@ _sse_from_x87_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
     sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
 }
     x87_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
     sse_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
 }
@@ -2270,6 +2524,7 @@ _sse_from_x87_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
     x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
 }
     sse_stxi_f(CVT_OFFSET, _RBP_REGNO, r1);
     x87_ldxi_f(r0, _RBP_REGNO, CVT_OFFSET);
 }
@@ -2277,6 +2532,7 @@ _x87_from_sse_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
 static void
 _x87_from_sse_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+    CHECK_CVT_OFFSET();
     sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
     x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
 }
     sse_stxi_d(CVT_OFFSET, _RBP_REGNO, r1);
     x87_ldxi_d(r0, _RBP_REGNO, CVT_OFFSET);
 }
index 49244b5..b0b0ef7 100644 (file)
@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2012-2022  Free Software Foundation, Inc.
+ * Copyright (C) 2012-2023  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
  *
  * This file is part of GNU lightning.
  *
@@ -227,8 +227,25 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec)
        for (regno = 0; regno < _jitc->reglen; regno++) {
            if ((jit_class(_rvs[regno].spec) & spec) == spec &&
                !jit_regset_tstbit(&_jitc->regarg, regno) &&
        for (regno = 0; regno < _jitc->reglen; regno++) {
            if ((jit_class(_rvs[regno].spec) & spec) == spec &&
                !jit_regset_tstbit(&_jitc->regarg, regno) &&
-               !jit_regset_tstbit(&_jitc->reglive, regno))
+               !jit_regset_tstbit(&_jitc->reglive, regno)) {
+               if (jit_regset_tstbit(&_jitc->regmask, regno)) {
+                   /* search further, attempting to find a truly known
+                   * free register, not just one in unknown state. */
+                   jit_int32_t regfree;
+
+                   for (regfree = regno + 1;
+                        regfree < _jitc->reglen; regfree++) {
+                       if ((jit_class(_rvs[regfree].spec) & spec) == spec &&
+                           !jit_regset_tstbit(&_jitc->regarg, regfree) &&
+                           !jit_regset_tstbit(&_jitc->reglive, regfree) &&
+                           !jit_regset_tstbit(&_jitc->regmask, regfree)) {
+                           regno = regfree;
+                           break;
+                       }
+                   }
+               }
                goto regarg;
                goto regarg;
+           }
        }
 
        /* search for a register matching spec that is not an argument
        }
 
        /* search for a register matching spec that is not an argument
@@ -874,6 +891,7 @@ jit_new_state(void)
     jit_regset_new(&_jitc->regsav);
     jit_regset_new(&_jitc->reglive);
     jit_regset_new(&_jitc->regmask);
     jit_regset_new(&_jitc->regsav);
     jit_regset_new(&_jitc->reglive);
     jit_regset_new(&_jitc->regmask);
+    jit_regset_new(&_jitc->explive);
 
     jit_init();
 
 
     jit_init();
 
@@ -1335,14 +1353,36 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
            mask = 0;
            break;
        case jit_code_live:     case jit_code_va_end:
            mask = 0;
            break;
        case jit_code_live:     case jit_code_va_end:
-       case jit_code_retr:     case jit_code_retr_f:   case jit_code_retr_d:
-       case jit_code_pushargr: case jit_code_pushargr_f:
+       case jit_code_retr_c:   case jit_code_retr_uc:
+       case jit_code_retr_s:   case jit_code_retr_us:
+       case jit_code_retr_i:   case jit_code_retr_ui:
+       case jit_code_retr_l:
+       case jit_code_retr_f:   case jit_code_retr_d:
+       case jit_code_pushargr_c:
+       case jit_code_pushargr_uc:
+       case jit_code_pushargr_s:
+       case jit_code_pushargr_us:
+       case jit_code_pushargr_i:
+       case jit_code_pushargr_ui:
+       case jit_code_pushargr_l:
+       case jit_code_pushargr_f:
        case jit_code_pushargr_d:
        case jit_code_finishr:  /* synthesized will set jit_cc_a0_jmp */
            mask = jit_cc_a0_reg;
            break;
        case jit_code_pushargr_d:
        case jit_code_finishr:  /* synthesized will set jit_cc_a0_jmp */
            mask = jit_cc_a0_reg;
            break;
-       case jit_code_align:    case jit_code_reti:     case jit_code_pushargi:
-       case jit_code_finishi:  /* synthesized will set jit_cc_a0_jmp */
+       case jit_code_align:    case jit_code_skip:
+       case jit_code_reti_c:   case jit_code_reti_uc:
+       case jit_code_reti_s:   case jit_code_reti_us:
+       case jit_code_reti_i:   case jit_code_reti_ui:
+       case jit_code_reti_l:
+       case jit_code_pushargi_c:
+       case jit_code_pushargi_uc:
+       case jit_code_pushargi_s:
+       case jit_code_pushargi_us:
+       case jit_code_pushargi_i:
+       case jit_code_pushargi_ui:
+       case jit_code_pushargi_l:
+        case jit_code_finishi: /* synthesized will set jit_cc_a0_jmp */
            mask = jit_cc_a0_int;
            break;
        case jit_code_reti_f:   case jit_code_pushargi_f:
            mask = jit_cc_a0_int;
            break;
        case jit_code_reti_f:   case jit_code_pushargi_f:
@@ -1354,7 +1394,9 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_allocai:
            mask = jit_cc_a0_int|jit_cc_a1_int;
            break;
        case jit_code_allocai:
            mask = jit_cc_a0_int|jit_cc_a1_int;
            break;
-       case jit_code_arg:      case jit_code_arg_f:    case jit_code_arg_d:
+       case jit_code_arg_c:    case jit_code_arg_s:
+       case jit_code_arg_i:    case jit_code_arg_l:
+       case jit_code_arg_f:    case jit_code_arg_d:
            mask = jit_cc_a0_int|jit_cc_a0_arg;
            break;
        case jit_code_calli:    case jit_code_jmpi:
            mask = jit_cc_a0_int|jit_cc_a0_arg;
            break;
        case jit_code_calli:    case jit_code_jmpi:
@@ -1378,11 +1420,17 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_getarg_f: case jit_code_getarg_d:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_arg;
            break;
        case jit_code_getarg_f: case jit_code_getarg_d:
            mask = jit_cc_a0_reg|jit_cc_a0_chg|jit_cc_a1_arg;
            break;
-       case jit_code_putargr:  case jit_code_putargr_f:
-       case jit_code_putargr_d:
+       case jit_code_putargr_c:case jit_code_putargr_uc:
+       case jit_code_putargr_s:case jit_code_putargr_us:
+       case jit_code_putargr_i:case jit_code_putargr_ui:
+       case jit_code_putargr_l:
+       case jit_code_putargr_f:case jit_code_putargr_d:
            mask = jit_cc_a0_reg|jit_cc_a1_arg;
            break;
            mask = jit_cc_a0_reg|jit_cc_a1_arg;
            break;
-       case jit_code_putargi:
+       case jit_code_putargi_c:case jit_code_putargi_uc:
+       case jit_code_putargi_s:case jit_code_putargi_us:
+       case jit_code_putargi_i:case jit_code_putargi_ui:
+       case jit_code_putargi_l:
            mask = jit_cc_a0_int|jit_cc_a1_arg;
            break;
        case jit_code_putargi_f:
            mask = jit_cc_a0_int|jit_cc_a1_arg;
            break;
        case jit_code_putargi_f:
@@ -1422,6 +1470,8 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
        case jit_code_negr_d:   case jit_code_absr_d:   case jit_code_sqrtr_d:
        case jit_code_movr_d:   case jit_code_extr_d:   case jit_code_extr_f_d:
        case jit_code_ldr_d:
        case jit_code_negr_d:   case jit_code_absr_d:   case jit_code_sqrtr_d:
        case jit_code_movr_d:   case jit_code_extr_d:   case jit_code_extr_f_d:
        case jit_code_ldr_d:
+       case jit_code_clor:     case jit_code_clzr:
+       case jit_code_ctor:     case jit_code_ctzr:
        case jit_code_movr_w_f: case jit_code_movr_f_w:
        case jit_code_movr_w_d: case jit_code_movr_d_w:
        case jit_code_va_arg:   case jit_code_va_arg_d:
        case jit_code_movr_w_f: case jit_code_movr_f_w:
        case jit_code_movr_w_d: case jit_code_movr_d_w:
        case jit_code_va_arg:   case jit_code_va_arg_d:
@@ -1648,8 +1698,14 @@ _do_setup(jit_state_t *_jit)
      * at the start of a basic block */
     for (offset = 0; offset < _jitc->blocks.offset; offset++) {
        block = _jitc->blocks.ptr + offset;
      * at the start of a basic block */
     for (offset = 0; offset < _jitc->blocks.offset; offset++) {
        block = _jitc->blocks.ptr + offset;
-       if (!block->label || block->label->code == jit_code_epilog)
+       if (!block->label)
            continue;
            continue;
+       if (block->label->code == jit_code_epilog) {
+           jit_regset_setbit(&block->reglive, JIT_RET);
+           jit_regset_setbit(&block->reglive, JIT_FRET);
+           jit_regset_com(&block->regmask, &block->reglive);
+           continue;
+       }
        jit_setup(block);
     }
 }
        jit_setup(block);
     }
 }
@@ -1750,7 +1806,7 @@ _check_block_again(jit_state_t *_jit)
     }
     while (todo);
 
     }
     while (todo);
 
-    return (1);
+    return (todo);
 }
 
 static void
 }
 
 static void
@@ -1781,6 +1837,7 @@ _jit_optimize(jit_state_t *_jit)
     jit_node_t         *node;
     jit_block_t                *block;
     jit_word_t          offset;
     jit_node_t         *node;
     jit_block_t                *block;
     jit_word_t          offset;
+    jit_regset_t        regmask;
 
     todo = 0;
     _jitc->function = NULL;
 
     todo = 0;
     _jitc->function = NULL;
@@ -1795,15 +1852,31 @@ _jit_optimize(jit_state_t *_jit)
     if (simplify())
        todo = 1;
 
     if (simplify())
        todo = 1;
 
-    /* Figure out labels that are only reached with a jump
-     * and is required to do a simple redundant_store removal
-     * on jit_beqi below */
+    jit_regset_set_ui(&regmask, 0);
+    for (offset = 0; offset < _jitc->reglen; offset++) {
+       if ((jit_class(_rvs[offset].spec) & (jit_class_gpr|jit_class_fpr)) &&
+           (jit_class(_rvs[offset].spec) & jit_class_sav) == jit_class_sav)
+           jit_regset_setbit(&regmask, offset);
+    }
+
+    /* Figure out labels that are only reached with a jump */
     jump = 1;
     for (node = _jitc->head; node; node = node->next) {
        switch (node->code) {
            case jit_code_label:
     jump = 1;
     for (node = _jitc->head; node; node = node->next) {
        switch (node->code) {
            case jit_code_label:
-               if (!jump)
+               if (!jump) {
                    node->flag |= jit_flag_head;
                    node->flag |= jit_flag_head;
+                   if (!node->link) {
+                       /* Block is dead code or only reachable with an
+                        * indirect jumps. In such condition, must assume
+                        * all callee save registers are live. */
+                       block = _jitc->blocks.ptr + node->v.w;
+                       jit_regset_ior(&block->reglive,
+                                      &block->reglive, &regmask);
+                       /* Cleanup regmask */
+                       block_update_set(block, block);
+                   }
+               }
                break;
            case jit_code_jmpi:         case jit_code_jmpr:
            case jit_code_epilog:
                break;
            case jit_code_jmpi:         case jit_code_jmpr:
            case jit_code_epilog:
@@ -1932,6 +2005,10 @@ _jit_reglive(jit_state_t *_jit, jit_node_t *node)
        case jit_code_label:    case jit_code_prolog:   case jit_code_epilog:
            block = _jitc->blocks.ptr + node->v.w;
            jit_regset_set(&_jitc->reglive, &block->reglive);
        case jit_code_label:    case jit_code_prolog:   case jit_code_epilog:
            block = _jitc->blocks.ptr + node->v.w;
            jit_regset_set(&_jitc->reglive, &block->reglive);
+           jit_regset_set_ui(&_jitc->explive, 0);
+           break;
+       case jit_code_live:
+           jit_regset_setbit(&_jitc->explive, node->u.w);
            break;
        case jit_code_callr:
            value = jit_regno(node->u.w);
            break;
        case jit_code_callr:
            value = jit_regno(node->u.w);
@@ -2043,6 +2120,19 @@ _jit_regarg_set(jit_state_t *_jit, jit_node_t *node, jit_int32_t value)
        else
            jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
     }
        else
            jit_regset_setbit(&_jitc->regarg, jit_regno(node->w.w));
     }
+    /* Prevent incorrect detection of running out of registers
+     * if will need to patch jump, and all registers have been
+     * used in the current block. */
+    if (node->code == jit_code_jmpi && (node->flag & jit_flag_node)) {
+       jit_node_t      *label = node->u.n;
+       jit_block_t     *block = _jitc->blocks.ptr + label->v.w;
+       jit_regset_set(&_jitc->reglive, &block->reglive);
+       jit_regset_set(&_jitc->regmask, &block->regmask);
+       if (jit_regset_set_p(&_jitc->explive)) {
+           jit_regset_ior(&_jitc->reglive, &block->reglive, &_jitc->explive);
+           jit_regset_xor(&_jitc->regmask, &_jitc->regmask, &_jitc->explive);
+       }
+    }
 }
 
 void
 }
 
 void
@@ -2244,7 +2334,7 @@ _jit_emit(jit_state_t *_jit)
 #else
     if (!_jit->user_code) {
        mmap_prot = PROT_READ | PROT_WRITE;
 #else
     if (!_jit->user_code) {
        mmap_prot = PROT_READ | PROT_WRITE;
-#if !__OpenBSD__
+#if !(__OpenBSD__ || __APPLE__)
        mmap_prot |= PROT_EXEC;
 #endif
 #if __NetBSD__
        mmap_prot |= PROT_EXEC;
 #endif
 #if __NetBSD__
@@ -2307,8 +2397,7 @@ _jit_emit(jit_state_t *_jit)
 #  endif
 #else
            _jit->code.ptr = mmap(NULL, length,
 #  endif
 #else
            _jit->code.ptr = mmap(NULL, length,
-                                 PROT_EXEC | PROT_READ | PROT_WRITE,
-                                 MAP_PRIVATE | MAP_ANON, mmap_fd, 0);
+                                 mmap_prot, mmap_flags, mmap_fd, 0);
 #endif
 
            assert(_jit->code.ptr != MAP_FAILED);
 #endif
 
            assert(_jit->code.ptr != MAP_FAILED);
@@ -2340,12 +2429,12 @@ _jit_emit(jit_state_t *_jit)
        assert(result == 0);
     }
     if (!_jit->user_code) {
        assert(result == 0);
     }
     if (!_jit->user_code) {
-       length = _jit->pc.uc - _jit->code.ptr;
+       _jit->code.protected = _jit->pc.uc - _jit->code.ptr;
 #  if __riscv && __WORDSIZE == 64
        /* FIXME should start adding consts at a page boundary */
 #  if __riscv && __WORDSIZE == 64
        /* FIXME should start adding consts at a page boundary */
-       length -= _jitc->consts.hash.count * sizeof(jit_word_t);
+       _jit->code.protected -= _jitc->consts.hash.count * sizeof(jit_word_t);
 #  endif
 #  endif
-       result = mprotect(_jit->code.ptr, length, PROT_READ | PROT_EXEC);
+       result = mprotect(_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC);
        assert(result == 0);
     }
 #endif /* HAVE_MMAP */
        assert(result == 0);
     }
 #endif /* HAVE_MMAP */
@@ -2355,6 +2444,32 @@ fail:
     return (NULL);
 }
 
     return (NULL);
 }
 
+void
+_jit_protect(jit_state_t *_jit)
+{
+#if !HAVE_MMAP
+  assert (_jit->user_code);
+#else
+  int result;
+  if (_jit->user_code) return;
+  result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_EXEC);
+  assert (result == 0);
+#endif
+}
+
+void
+_jit_unprotect(jit_state_t *_jit)
+{
+#if !HAVE_MMAP
+  assert (_jit->user_code);
+#else
+  int result;
+  if (_jit->user_code) return;
+  result = mprotect (_jit->code.ptr, _jit->code.protected, PROT_READ | PROT_WRITE);
+  assert (result == 0);
+#endif
+}
+
 void
 _jit_frame(jit_state_t *_jit, jit_int32_t frame)
 {
 void
 _jit_frame(jit_state_t *_jit, jit_int32_t frame)
 {
@@ -2786,6 +2901,9 @@ _jit_update(jit_state_t *_jit, jit_node_t *node,
                         * to jump to unknown location. */
                        /* Treat all callee save as live. */
                        jit_regset_ior(live, live, mask);
                         * to jump to unknown location. */
                        /* Treat all callee save as live. */
                        jit_regset_ior(live, live, mask);
+                       /*   Prevent explicitly set as live registers to
+                        * be used as a temporary for the jmpi. */
+                       jit_regset_ior(live, live, &_jitc->explive);
                        /* Treat anything else as dead. */
                        return;
                    }
                        /* Treat anything else as dead. */
                        return;
                    }
@@ -2853,7 +2971,10 @@ _sequential_labels(jit_state_t *_jit)
                    if ((jump = node->link)) {
                        for (; jump; jump = link) {
                            link = jump->link;
                    if ((jump = node->link)) {
                        for (; jump; jump = link) {
                            link = jump->link;
-                           jump->u.n = prev;
+                           if (jump->code == jit_code_movi)
+                               jump->v.n = prev;
+                           else
+                               jump->u.n = prev;
                            jump->link = prev->link;
                            prev->link = jump;
                        }
                            jump->link = prev->link;
                            prev->link = jump;
                        }
@@ -2867,7 +2988,10 @@ _sequential_labels(jit_state_t *_jit)
                if ((jump = next->link)) {
                    for (; jump; jump = link) {
                        link = jump->link;
                if ((jump = next->link)) {
                    for (; jump; jump = link) {
                        link = jump->link;
-                       jump->u.n = node;
+                       if (jump->code == jit_code_movi)
+                           jump->v.n = node;
+                       else
+                           jump->u.n = node;
                        jump->link = node->link;
                        node->link = jump;
                    }
                        jump->link = node->link;
                        node->link = jump;
                    }
@@ -3022,7 +3146,6 @@ _redundant_jump(jit_state_t *_jit, jit_node_t *prev, jit_node_t *node)
                }
                break;
            case jit_code_name:         case jit_code_note:
                }
                break;
            case jit_code_name:         case jit_code_note:
-           case jit_code_align:
                break;
            default:
                return (0);
                break;
            default:
                return (0);
@@ -3073,7 +3196,7 @@ reverse_jump_code(jit_code_t code)
        case jit_code_bgti_f:   return (jit_code_bunlei_f);
 
        case jit_code_bner_f:   return (jit_code_beqr_f);
        case jit_code_bgti_f:   return (jit_code_bunlei_f);
 
        case jit_code_bner_f:   return (jit_code_beqr_f);
-       case jit_code_bnei_f:   return (jit_code_beqr_f);
+       case jit_code_bnei_f:   return (jit_code_beqi_f);
 
        case jit_code_bunltr_f: return (jit_code_bger_f);
        case jit_code_bunlti_f: return (jit_code_bgei_f);
 
        case jit_code_bunltr_f: return (jit_code_bger_f);
        case jit_code_bunlti_f: return (jit_code_bgei_f);
@@ -3860,6 +3983,9 @@ static maybe_unused void
 generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
 #endif
 
 generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1);
 #endif
 
+#define patch_alist(revert)            _patch_alist(_jit, revert)
+static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert);
+
 #if defined(__i386__) || defined(__x86_64__)
 #  include "jit_x86.c"
 #elif defined(__mips__)
 #if defined(__i386__) || defined(__x86_64__)
 #  include "jit_x86.c"
 #elif defined(__mips__)
@@ -3929,3 +4055,40 @@ generic_bswapr_ul(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     jit_unget_reg(reg);
 }
 #endif
     jit_unget_reg(reg);
 }
 #endif
+
+#if defined(stack_framesize)
+static maybe_unused void
+_patch_alist(jit_state_t *_jit, jit_bool_t revert)
+{
+    jit_int32_t                 diff;
+    jit_node_t         *node;
+    diff = jit_diffsize();
+    if (diff) {
+       if (revert)
+           diff = -diff;
+       for (node = _jitc->function->alist; node; node = node->link) {
+           switch (node->code) {
+               case jit_code_ldxi_c:   case jit_code_ldxi_uc:
+               case jit_code_ldxi_s:   case jit_code_ldxi_us:
+               case jit_code_ldxi_i:
+#if __WORDSIZE == 64
+               case jit_code_ldxi_ui:  case jit_code_ldxi_l:
+#endif
+               case jit_code_ldxi_f:   case jit_code_ldxi_d:
+                   node->w.w -= diff;
+                   break;
+               case jit_code_stxi_c:   case jit_code_stxi_s:
+               case jit_code_stxi_i:
+#if __WORDSIZE == 64
+               case jit_code_stxi_l:
+#endif
+               case jit_code_stxi_f:   case jit_code_stxi_d:
+                   node->u.w -= diff;
+                   break;
+               default:
+                   abort();
+           }
+       }
+    }
+}
+#endif
index 1728fb2..1f31ed6 100644 (file)
@@ -68,14 +68,6 @@ main(int argc, char *argv[])
 #  else
     fprintf(fp, "#if !defined(__ARM_PCS_VFP)\n");
 #  endif
 #  else
     fprintf(fp, "#if !defined(__ARM_PCS_VFP)\n");
 #  endif
-#elif defined(__mips__)
-#  if __WORDSIZE == 32
-#    if NEW_ABI
-    fprintf(fp, "#if NEW_ABI\n");
-#    else
-    fprintf(fp, "#if !NEW_ABI\n");
-#    endif
-#  endif
 #elif defined(__powerpc__)
     fprintf(fp, "#if defined(__powerpc__)\n");
     fprintf(fp, "#if __BYTE_ORDER == %s\n",
 #elif defined(__powerpc__)
     fprintf(fp, "#if defined(__powerpc__)\n");
     fprintf(fp, "#if __BYTE_ORDER == %s\n",
@@ -94,10 +86,6 @@ main(int argc, char *argv[])
        fprintf(fp, "    %d,    /* %s */\n", _szs[offset], code_name[offset]);
 #if defined(__arm__)
     fprintf(fp, "#endif /* __ARM_PCS_VFP */\n");
        fprintf(fp, "    %d,    /* %s */\n", _szs[offset], code_name[offset]);
 #if defined(__arm__)
     fprintf(fp, "#endif /* __ARM_PCS_VFP */\n");
-#elif defined(__mips__)
-#  if __WORDSIZE == 32
-    fprintf(fp, "#endif /* NEW_ABI */\n");
-#  endif
 #elif defined(__powerpc__)
 #  if __WORDSIZE == 32
     fprintf(fp, "#endif /* "
 #elif defined(__powerpc__)
 #  if __WORDSIZE == 32
     fprintf(fp, "#endif /* "