git subrepo pull --force --remote=https://git.savannah.gnu.org/git/lightning.git...
authorPaul Cercueil <paul@crapouillou.net>
Fri, 1 Nov 2024 23:32:24 +0000 (00:32 +0100)
committerPaul Cercueil <paul@crapouillou.net>
Fri, 1 Nov 2024 23:32:47 +0000 (00:32 +0100)
subrepo:
  subdir:   "deps/lightning"
  merged:   "808fdde9e8"
upstream:
  origin:   "https://git.savannah.gnu.org/git/lightning.git"
  branch:   "master"
  commit:   "808fdde9e8"
git-subrepo:
  version:  "0.4.6"
  origin:   "https://github.com/ingydotnet/git-subrepo.git"
  commit:   "110b9eb"

65 files changed:
deps/lightning/.gitmodules
deps/lightning/.gitrepo
deps/lightning/ChangeLog
deps/lightning/TODO
deps/lightning/check/Makefile.am
deps/lightning/check/all.tst
deps/lightning/check/carry.tst
deps/lightning/check/cldstxba.c [new file with mode: 0644]
deps/lightning/check/float.tst
deps/lightning/check/ldstxbai.ok [new file with mode: 0644]
deps/lightning/check/ldstxbai.tst [new file with mode: 0644]
deps/lightning/check/ldstxbar.ok [new file with mode: 0644]
deps/lightning/check/ldstxbar.tst [new file with mode: 0644]
deps/lightning/check/lightning.c
deps/lightning/configure.ac
deps/lightning/doc/body.texi
deps/lightning/gnulib [new submodule]
deps/lightning/include/lightning.h.in
deps/lightning/include/lightning/Makefile.am
deps/lightning/include/lightning/jit_arm.h
deps/lightning/include/lightning/jit_ppc.h
deps/lightning/include/lightning/jit_private.h
deps/lightning/include/lightning/jit_sh.h [new file with mode: 0644]
deps/lightning/lib/Makefile.am
deps/lightning/lib/jit_aarch64-cpu.c
deps/lightning/lib/jit_aarch64-fpu.c
deps/lightning/lib/jit_aarch64-sz.c
deps/lightning/lib/jit_aarch64.c
deps/lightning/lib/jit_alpha-sz.c
deps/lightning/lib/jit_alpha.c
deps/lightning/lib/jit_arm-cpu.c
deps/lightning/lib/jit_arm-sz.c
deps/lightning/lib/jit_arm-vfp.c
deps/lightning/lib/jit_arm.c
deps/lightning/lib/jit_disasm.c
deps/lightning/lib/jit_fallback.c
deps/lightning/lib/jit_hppa-sz.c
deps/lightning/lib/jit_hppa.c
deps/lightning/lib/jit_ia64-sz.c
deps/lightning/lib/jit_ia64.c
deps/lightning/lib/jit_loongarch-sz.c
deps/lightning/lib/jit_loongarch.c
deps/lightning/lib/jit_mips-fpu.c
deps/lightning/lib/jit_mips-sz.c
deps/lightning/lib/jit_mips.c
deps/lightning/lib/jit_names.c
deps/lightning/lib/jit_ppc-cpu.c
deps/lightning/lib/jit_ppc-fpu.c
deps/lightning/lib/jit_ppc-sz.c
deps/lightning/lib/jit_ppc.c
deps/lightning/lib/jit_riscv-sz.c
deps/lightning/lib/jit_riscv.c
deps/lightning/lib/jit_s390-sz.c
deps/lightning/lib/jit_s390.c
deps/lightning/lib/jit_sh-cpu.c [new file with mode: 0644]
deps/lightning/lib/jit_sh-fpu.c [new file with mode: 0644]
deps/lightning/lib/jit_sh-sz.c [new file with mode: 0644]
deps/lightning/lib/jit_sh.c [new file with mode: 0644]
deps/lightning/lib/jit_size.c
deps/lightning/lib/jit_sparc-sz.c
deps/lightning/lib/jit_sparc.c
deps/lightning/lib/jit_x86-cpu.c
deps/lightning/lib/jit_x86-sz.c
deps/lightning/lib/jit_x86.c
deps/lightning/lib/lightning.c

index e69de29..acb2669 100644 (file)
@@ -0,0 +1,3 @@
+[submodule "gnulib"]
+       path = gnulib
+       url = git://git.sv.gnu.org/gnulib.git
index 55cc9e6..2535adc 100644 (file)
@@ -4,9 +4,9 @@
 ; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme
 ;
 [subrepo]
-       remote = https://github.com/pcercuei/gnu_lightning.git
-       branch = pcsx_rearmed
-       commit = de026794c71386983034461bce2df3c63ccd5827
-       parent = fb67ea334b0f3984a114a6e306806a56347a83ba
+       remote = https://git.savannah.gnu.org/git/lightning.git
+       branch = master
+       commit = 808fdde9e81cc1f43fd3ef3b01d24744c18bc123
+       parent = dde06e44db790da43b379ff3ef74accb15c3586e
        method = merge
        cmdver = 0.4.6
index 8a5588a..5d107f2 100644 (file)
@@ -1,3 +1,40 @@
+2024-01-24 Paulo Andrade <pcpa@gnu.org>
+
+       * check/Makefile.am: Add new ldstxbar test.
+       * check/all.tst: Add simple code to disassemble new codes.
+       * check/lightning.c: Add logic to call the new codes.
+       * doc/body.texi: Document the new codes and remove note about
+       only an immediate displacement argument supported.
+       * include/lightning.h.in: Add the new {ld,st}x{b,a}r_* codes.
+       * lib/jit_names.c: Add debug string for the new codes.
+       * lib/lightning.c: Implement fallbacks for the new codes.
+       * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha.c,
+       lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa.c, lib/jit_ia64.c,
+       lib/jit_loongarch-sz.c, lib/jit_loongarch.c, lib/jit_mips-sz.c,
+       lib/jit_mips.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_riscv-sz.c,
+       lib/jit_riscv.c, lib/jit_s390.c, lib/jit_sparc-sz.c, lib/jit_sparc.c,
+       lib/jit_x86-sz.c, lib/jit_x86.c: Implement the new increment load
+       and store codes with a register displacement.
+
+2023-12-22 Paulo Andrade <pcpa@gnu.org>
+
+       * check/Makefile.am: Add new ldstxbai test.
+       * check/all.tst: Add simple code to disassemble new codes.
+       * check/lightning.c: Add logic to call the new codes.
+       * doc/body.texi: Document the new codes.
+       * include/lightning.h.in: Add the new {ld,st}x{b,a}i_* codes.
+       * include/lightning/jit_private.h: Add jit_cc_a1_dep to tell
+       the instruction has argument one used as input and output.
+       * lib/lightning.c: Implement fallbacks for the new codes.
+       * lib/jit_names.c: Add debug string for the new codes.
+       * lib/jit_aarch64-sz.c, lib/jit_aarch64.c, lib/jit_alpha.c,
+       lib/jit_arm-sz.c, lib/jit_arm.c, lib/jit_hppa.c, lib/jit_ia64.c,
+       lib/jit_loongarch-sz.c, lib/jit_loongarch.c, lib/jit_mips-sz.c,
+       lib/jit_mips.c, lib/jit_ppc-sz.c, lib/jit_ppc.c, lib/jit_riscv-sz.c,
+       lib/jit_riscv.c, lib/jit_s390.c, lib/jit_sparc-sz.c, lib/jit_sparc.c,
+       lib/jit_x86-sz.c, lib/jit_x86.c: Implement the new increment load
+       and store codes.
+
 2023-08-21 Paulo Andrade <pcpa@gnu.org>
 
        * check/Makefile.am, check/lightning.c: Add new hmul tests.
index 8b13789..72a0d02 100644 (file)
@@ -1 +1,2 @@
-
+o Use PC relative load/store in aarch64
+o Check post-index in real arm hardware
index 1f086ef..d43549d 100644 (file)
@@ -18,7 +18,7 @@ AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include \
        -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 
 check_PROGRAMS = lightning ccall self setcode nodata ctramp carg cva_list \
-       catomic protect riprel cbit callee
+       catomic protect riprel cbit callee cldstxba
 
 lightning_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
 lightning_SOURCES = lightning.c
@@ -67,7 +67,11 @@ cbit_SOURCES = cbit.c
 callee_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
 callee_SOURCES = callee.c
 
+cldstxba_LDADD = $(top_builddir)/lib/liblightning.la -lm $(SHLIB)
+cldstxba_SOURCES = cldstxba.c
+
 noinst_PROGRAMS = gen_cbit
+gen_cgit_LDADD =
 gen_cbit_SOURCES = gen_cbit.c
 
 cbit.c:                gen_cbit
@@ -94,6 +98,8 @@ EXTRA_DIST =                          \
        ldstr-c.tst     ldstr-c.ok      \
        ldstxr-c.tst    ldstxr-c.ok     \
        ldstxi-c.tst    ldstxi-c.ok     \
+       ldstxbai.tst    ldstxbai.ok     \
+       ldstxbar.tst    ldstxbar.ok     \
        ext.tst         ext.ok          \
        cvt.tst         cvt.ok          \
        hton.tst        hton.ok         \
@@ -162,8 +168,9 @@ base_TESTS =                                \
        ldstr ldsti                     \
        ldstxr ldstxi                   \
        ldstr-c ldstxr-c ldstxi-c       \
-       ext cvt hton bswap branch       \
-       alu_add alux_add                \
+       ldstxbai ldstxbar               \
+       ext cvt hton bswap              \
+       branch alu_add alux_add         \
        alu_sub alux_sub alu_rsb        \
        alu_mul alu_hmul                \
        alu_div alu_rem                 \
@@ -193,6 +200,7 @@ x87_TESTS =                                 \
        rpn.x87 ldstr.x87 ldsti.x87             \
        ldstxr.x87 ldstxi.x87                   \
        ldstr-c.x87 ldstxr-c.x87 ldstxi-c.x87   \
+       ldstxbai.x87 ldstxbar.x87               \
        ext.x87 cvt.x87 branch.x87              \
        alu_add.x87 alux_add.x87                \
        alu_sub.x87 alux_sub.x87 alu_rsb.x87    \
@@ -245,6 +253,7 @@ arm_TESTS =                                 \
        rpn.arm ldstr.arm ldsti.arm             \
        ldstxr.arm ldstxi.arm                   \
        ldstr-c.arm ldstxr-c.arm ldstxi-c.arm   \
+       ldstxbai.arm ldstxbar.arm               \
        ext.arm cvt.arm hton.arm bswap.arm      \
        branch.arm alu_add.arm alux_add.arm     \
        alu_sub.arm alux_sub.arm alu_rsb.arm    \
@@ -273,6 +282,7 @@ swf_TESTS =                                 \
        rpn.swf ldstr.swf ldsti.swf             \
        ldstxr.swf ldstxi.swf                   \
        ldstr-c.swf ldstxr-c.swf ldstxi-c.swf   \
+       ldstxbai.swf ldstxbar.swf               \
        ext.swf cvt.swf hton.swf bswap.swf      \
        branch.swf alu_add.swf alux_add.swf     \
        alu_sub.swf alux_sub.swf alu_rsb.swf    \
@@ -299,6 +309,7 @@ arm_swf_TESTS =                                                     \
        rpn.arm.swf ldstr.arm.swf ldsti.arm.swf                 \
        ldstxr.arm.swf ldstxi.arm.swf                           \
        ldstr-c.arm.swf ldstxr-c.arm.swf ldstxi-c.arm.swf       \
+       ldstxbai.arm.swf ldstxbar.arm.swf                       \
        ext.arm.swf cvt.arm.swf hton.arm.swf bswap.arm.swf      \
        branch.arm.swf alu_add.arm.swf alux_add.arm.swf         \
        alu_sub.arm.swf alux_sub.arm.swf alu_rsb.arm.swf        \
@@ -325,6 +336,7 @@ arm4_swf_TESTS =                                            \
        rpn.arm4.swf ldstr.arm4.swf ldsti.arm4.swf              \
        ldstxr.arm4.swf ldstxi.arm4.swf                         \
        ldstr-c.arm4.swf ldstxr-c.arm4.swf ldstxi-c.arm4.swf    \
+       ldstxbai.arm4.swf ldstxbar.arm4.swf                     \
        ext.arm4.swf cvt.arm4.swf hton.arm4.swf bswap.arm4.swf  \
        branch.arm4.swf alu_add.arm4.swf alux_add.arm4.swf      \
        alu_sub.arm4.swf alux_sub.arm4.swf alu_rsb.arm4.swf     \
@@ -354,6 +366,7 @@ nodata_TESTS =                                              \
        rpn.nodata ldstr.nodata ldsti.nodata            \
        ldstxr.nodata ldstxi.nodata                     \
        ldstr-c.nodata ldstxr-c.nodata ldstxi-c.nodata  \
+       ldstxbai.nodata ldstxbar.nodata                 \
        ext.nodata cvt.nodata branch.nodata             \
        alu_add.nodata alux_add.nodata                  \
        alu_sub.nodata alux_sub.nodata alu_rsb.nodata   \
index 2257ac6..a63e489 100644 (file)
        ldxr_l %r0 %r1 %r2
        ldxi_l %r0 %r1 8
 #endif
+       ldxbr_c %r0 %r1 %r2
+       ldxbi_c %r0 %r1 1
+       ldxar_c %r0 %r1 %r2
+       ldxai_c %r0 %r1 1
+       ldxbr_uc %r0 %r1 %r2
+       ldxbi_uc %r0 %r1 1
+       ldxar_uc %r0 %r1 %r2
+       ldxai_uc %r0 %r1 1
+       ldxbr_s %r0 %r1 %r2
+       ldxbi_s %r0 %r1 2
+       ldxar_s %r0 %r1 %r2
+       ldxai_s %r0 %r1 2
+       ldxbr_us %r0 %r1 %r2
+       ldxbi_us %r0 %r1 2
+       ldxar_us %r0 %r1 %r2
+       ldxai_us %r0 %r1 2
+       ldxbr_i %r0 %r1 %r2
+       ldxbi_i %r0 %r1 4
+       ldxar_i %r0 %r1 %r2
+       ldxai_i %r0 %r1 4
+#if __WORDSIZE == 64
+       ldxbr_ui %r0 %r1 %r2
+       ldxbi_ui %r0 %r1 4
+       ldxar_ui %r0 %r1 %r2
+       ldxai_ui %r0 %r1 4
+       ldxbi_l %r0 %r1 8
+       ldxbr_l %r0 %r1 %r2
+       ldxai_l %r0 %r1 8
+#endif
+       ldxbr_f %f0 %r1 %r2
+       ldxbi_f %f0 %r1 4
+       ldxar_f %f0 %r1 %r2
+       ldxai_f %f0 %r1 4
+       ldxbr_d %f0 %r1 %r2
+       ldxbi_d %f0 %r1 8
+       ldxar_d %f0 %r1 %r2
+       ldxai_d %f0 %r1 8
        str_c %r1 %r0
        sti_c 0x80000000 %r1
        str_s %r1 %r0
        stxr_l %r2 %r1 %r0
        stxi_l 8 %r1 %r0
 #endif
+       stxbr_c %r2 %r1 %r0
+       stxbi_c 1 %r1 %r0
+       stxar_c %r2 %r1 %r0
+       stxai_c 1 %r1 %r0
+       stxbr_s %r2 %r1 %r0
+       stxbi_s 2 %r1 %r0
+       stxar_s %r2 %r1 %r0
+       stxai_s 2 %r1 %r0
+       stxbr_i %r2 %r1 %r0
+       stxbi_i 4 %r1 %r0
+       stxar_i %r2 %r1 %r0
+       stxai_i 4 %r1 %r0
+#if __WORDSIZE == 64
+       stxbr_l %r2 %r1 %r0
+       stxbi_l 8 %r1 %r0
+       stxar_l %r2 %r1 %r0
+       stxai_l 8 %r1 %r0
+#endif
+       stxbr_f %r2 %r0 %f0
+       stxbi_f 4 %r0 %f0
+       stxar_f %r2 %r0 %f0
+       stxai_f 4 %r0 %f0
+       stxbr_d %r2 %r0 %f0
+       stxbi_d 8 %r0 %f0
+       stxar_d %r2 %r0 %f0
+       stxai_d 8 %r0 %f0
 cond:
        bltr cond %r0 %r1
 condi:
index 180d896..615d964 100644 (file)
@@ -136,6 +136,8 @@ ok:
        fadd(__LINE__,   , r0, r1, x7fe, x1, x7f)
        tsub(__LINE__,   , r0, r1, x80,  x1, x7f)
        fsub(__LINE__,   , r0, r1, x7f,  x1, x7fe)
+       fsub(__LINE__,   , r0, r1, x0,   x1, ff)
+       fsub(__LINE__,   , r0, r1, ff,   ff, x0)
        tadd(__LINE__, _u, r0, r1, ff,   x1, x0)
        fadd(__LINE__, _u, r0, r1, x7f,  x1, x80)
        tsub(__LINE__, _u, r0, r1, x0,   x1, ff)
diff --git a/deps/lightning/check/cldstxba.c b/deps/lightning/check/cldstxba.c
new file mode 100644 (file)
index 0000000..c9dd6e8
--- /dev/null
@@ -0,0 +1,224 @@
+#include <lightning.h>
+#include <stdio.h>
+
+#if !defined(offsetof)
+#  define offsetof(type, field) ((char *)&((type *)0)->field - (char *)0)
+#endif
+
+int
+main(int argc, char *argv[])
+{
+    jit_state_t                *_jit;
+    jit_node_t         *jmp, *fail;
+    void               (*code)(void);
+#if defined(__x86_64__) || defined(__i386__)
+    /* test lodsb stosb special cases */
+    struct data_t {
+       signed char     sc;
+       unsigned char   uc;
+       signed short    ss;
+       unsigned short  us;
+       signed int      si;
+       unsigned int    ui;
+       unsigned long   ul;
+    } data;
+
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+    jit_prolog();
+    fail = jit_forward();
+
+#define SC_VAL         -3
+    jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, sc));
+    jit_movi(_RAX, SC_VAL);
+    jit_movr(_RSI, _RDI);
+    jit_stxai_c(1, _RDI, _RAX);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, 1);
+    jit_patch_at(jmp, fail);
+    data.uc = 0xa3;
+
+#define SS_VAL         -31
+    jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, ss));
+    jit_movi(_RAX, SS_VAL);
+    jit_movr(_RSI, _RDI);
+    jit_stxai_s(2, _RDI, _RAX);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, 2);
+    jit_patch_at(jmp, fail);
+    data.us = 0x5aa5;
+
+#define SI_VAL         -511
+    jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, si));
+    jit_movi(_RAX, SI_VAL);
+    jit_movr(_RSI, _RDI);
+    jit_stxai_i(4, _RDI, _RAX);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, 4);
+    jit_patch_at(jmp, fail);
+    data.ui = 0xabcddcba;
+
+#  if __X64 && !__X64_32
+#define UL_VAL         0x123456789abcdef
+    jit_movi(_RDI, (jit_word_t)&data + offsetof(struct data_t, ul));
+    jit_movi(_RAX, UL_VAL);
+    jit_movr(_RSI, _RDI);
+    jit_stxai_l(8, _RDI, _RAX);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, 8);
+    jit_patch_at(jmp, fail);
+#  endif
+
+    jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, sc));
+    jit_movr(_RDI, _RSI);
+    jit_ldxai_c(_RAX, _RSI, 1);
+    jmp = jit_bnei(_RAX, SC_VAL);
+    jit_patch_at(jmp, fail);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, -1);
+    jit_patch_at(jmp, fail);
+    jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, uc));
+    jit_movr(_RDI, _RSI);
+    jit_ldxai_uc(_RAX, _RSI, 1);
+    jmp = jit_bnei(_RAX, data.uc);
+    jit_patch_at(jmp, fail);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, -1);
+    jit_patch_at(jmp, fail);
+    jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, ss));
+    jit_movr(_RDI, _RSI);
+    jit_ldxai_s(_RAX, _RSI, 2);
+    jmp = jit_bnei(_RAX, SS_VAL);
+    jit_patch_at(jmp, fail);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, -2);
+    jit_patch_at(jmp, fail);
+    jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, us));
+    jit_movr(_RDI, _RSI);
+    jit_ldxai_us(_RAX, _RSI, 2);
+    jmp = jit_bnei(_RAX, data.us);
+    jit_patch_at(jmp, fail);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, -2);
+    jit_patch_at(jmp, fail);
+    jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, si));
+    jit_movr(_RDI, _RSI);
+    jit_ldxai_i(_RAX, _RSI, 4);
+    jmp = jit_bnei(_RAX, SI_VAL);
+    jit_patch_at(jmp, fail);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, -4);
+    jit_patch_at(jmp, fail);
+#  if __X64 && !__X64_32
+    jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, ui));
+    jit_movr(_RDI, _RSI);
+    jit_ldxai_ui(_RAX, _RSI, 4);
+    jmp = jit_bnei(_RAX, data.ui);
+    jit_patch_at(jmp, fail);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, -4);
+    jit_patch_at(jmp, fail);
+    jit_movi(_RSI, (jit_word_t)&data + offsetof(struct data_t, ul));
+    jit_movr(_RDI, _RSI);
+    jit_ldxai_l(_RAX, _RSI, 8);
+    jmp = jit_bnei(_RAX, UL_VAL);
+    jit_patch_at(jmp, fail);
+    jit_subr(_RDI, _RDI, _RSI);
+    jmp = jit_bnei(_RDI, -8);
+    jit_patch_at(jmp, fail);
+#  endif
+
+    jmp = jit_jmpi();
+    jit_link(fail);
+    jit_calli(abort);
+    jit_patch(jmp);
+    jit_prepare();
+    {
+       jit_pushargi((jit_word_t)"ok");
+    }
+    jit_finishi(puts);
+    jit_ret();
+    jit_epilog();
+    code = jit_emit();
+    jit_clear_state();
+
+    (*code)();
+
+    jit_destroy_state();
+    finish_jit();
+
+#elif defined(__arm__)
+    /* make sure to test ldmia and stmia cases */
+    struct data_t {
+       float           f1;
+       float           f2;
+       double          d3;
+       double          d4;
+    } data;
+
+    init_jit(argv[0]);
+    _jit = jit_new_state();
+    jit_prolog();
+    fail = jit_forward();
+
+#define F1_VAL         1
+    jit_movi(JIT_R0, (jit_word_t)&data + offsetof(struct data_t, f1));
+    jit_movi_f(JIT_F0, F1_VAL);
+    jit_movr(JIT_R1, JIT_R0);
+    jit_stxai_f(4, JIT_R0, JIT_F0);
+    jit_subr(JIT_R1, JIT_R0, JIT_R1);
+    jmp = jit_bnei(JIT_R1, 4);
+    jit_patch_at(jmp, fail);
+    data.f2 = 2;
+#define D3_VAL         3
+    jit_movi(JIT_R0, (jit_word_t)&data + offsetof(struct data_t, d3));
+    jit_movi_d(JIT_F0, D3_VAL);
+    jit_movr(JIT_R1, JIT_R0);
+    jit_stxai_d(8, JIT_R0, JIT_F0);
+    jit_subr(JIT_R1, JIT_R0, JIT_R1);
+    jmp = jit_bnei(JIT_R1, 8);
+    jit_patch_at(jmp, fail);
+    data.d4 = 4;
+
+    jit_movi(JIT_R0, (jit_word_t)&data + offsetof(struct data_t, f1));
+    jit_movr(JIT_R1, JIT_R0);
+    jit_ldxai_f(JIT_F0, JIT_R0, 4);
+    jmp = jit_bnei_f(JIT_F0, F1_VAL);
+    jit_patch_at(jmp, fail);
+    jit_subr(JIT_R1, JIT_R0, JIT_R1);
+    jmp = jit_bnei(JIT_R1, 4);
+    jit_patch_at(jmp, fail);
+
+    jit_movi(JIT_R0, (jit_word_t)&data + offsetof(struct data_t, d3));
+    jit_movr(JIT_R1, JIT_R0);
+    jit_ldxai_d(JIT_F0, JIT_R0, 8);
+    jmp = jit_bnei_d(JIT_F0, D3_VAL);
+    jit_patch_at(jmp, fail);
+    jit_subr(JIT_R1, JIT_R0, JIT_R1);
+    jmp = jit_bnei(JIT_R1, 8);
+    jit_patch_at(jmp, fail);
+
+    jmp = jit_jmpi();
+    jit_link(fail);
+    jit_calli(abort);
+    jit_patch(jmp);
+    jit_prepare();
+    {
+       jit_pushargi((jit_word_t)"ok");
+    }
+    jit_finishi(puts);
+    jit_ret();
+    jit_epilog();
+    code = jit_emit();
+    jit_clear_state();
+
+    (*code)();
+
+    jit_destroy_state();
+    finish_jit();
+#else
+    puts("ok");
+#endif
+    return (0);
+}
index 69a6caf..a181f84 100644 (file)
@@ -14,14 +14,14 @@ ok:
 #  define x80                  0x8000000000000000
 #endif
 
-#if (__mips__ && __mips_isa_rev < 6)  || __sparc__ || __hppa__ || __riscv
+#if (__mips__ && __mips_isa_rev < 6)  || __sparc__ || __hppa__ || __riscv || __sh__
 #  define wnan                 x7f
 #elif (__mips__ && __mips_isa_rev >= 6) || __arm__ || __aarch64__ || __alpha__ || __loongarch__
 #  define wnan                 0
 #else
 #  define wnan                 x80
 #endif
-#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ || __aarch64__ || __s390__ || __riscv || __loongarch__
+#if __mips__ || __arm__ || __ppc__ || __sparc__ || __hppa__ || __aarch64__ || __s390__ || __riscv || __loongarch__ || __sh__
 #  define wpinf                        x7f
 #elif __alpha__
 /* (at least) bug compatible with gcc 4.2.3 -ieee */
@@ -49,12 +49,12 @@ T##op##r##t##r0##f0##f1##l:                         \
        b##op##r##t bT##op##r##t##r0##f0##f1##l %f0 %f1 \
        calli @abort                                    \
 bT##op##r##t##r0##f0##f1##l:                           \
-       movi##t %f1 li                                  \
+       movi##t %f0 li                                  \
        op##i##t %r0 %f0 ri                             \
        bnei T##op##i##t##r0##f0##f1##l %r0 0           \
        calli @abort                                    \
 T##op##i##t##r0##f0##f1##l:                            \
-       movi##t %f1 li                                  \
+       movi##t %f0 li                                  \
        b##op##i##t bT##op##i##t##r0##f0##f1##l %f0 ri  \
        calli @abort                                    \
 bT##op##i##t##r0##f0##f1##l:                           \
@@ -64,7 +64,7 @@ bT##op##i##t##r0##f0##f1##l:                          \
        beqi F##op##r##t##r0##f0##f1##l %r0 1           \
        calli @abort                                    \
 F##op##r##t##r0##f0##f1##l:                            \
-       movi##t %f1 li                                  \
+       movi##t %f0 li                                  \
        op##i##t %r0 %f0 ri                             \
        beqi F##op##i##t##r0##f0##f1##l %r0 1           \
        calli @abort                                    \
diff --git a/deps/lightning/check/ldstxbai.ok b/deps/lightning/check/ldstxbai.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ldstxbai.tst b/deps/lightning/check/ldstxbai.tst
new file mode 100644 (file)
index 0000000..f23dd68
--- /dev/null
@@ -0,0 +1,444 @@
+.data  256
+
+/*
+       #define offs(field)     (offsetof(data_t, field) - offsetof(data_t, si8))
+ */
+#define nF32           -36     // offs(nf32)
+#define nF64           -32     // offs(nf64)
+#define nSI64          -24     // offs(nsi64)
+#define nUI32          -16     // offs(nui32)
+#define nSI32          -12     // offs(nsi32)
+#define nUI16           -6     // offs(nui16)
+#define nSI16           -4     // offs(nsi16)
+#define nUI8            -2     // offs(nui8)
+#define nSI8            -1     // offs(nsi8)
+#define SI8              0     // offs(si8)
+#define UI8              1     // offs(ui8)
+#define SI16             2     // offs(si18)
+#define UI16             4     // offs(ui16)
+#define SI32             8     // offs(si32)
+#define UI32            12     // offs(ui32)
+#define SI64            16     // offs(si64)
+#define F64             24     // offs(f64)
+#define F32             32     // offs(f32)
+
+/*
+       typedef struct {
+               int32_t         _pad0;
+               float32_t       nf32;
+               float64_t       nf64;
+               int64_t         nsi64;
+               uint32_t        nui32;
+               int32_t         nsi32;
+               short           _pad1;
+               uint16_t        nui16;
+               int16_6         nsi16;
+               uint8_t         nui8;
+               int8_t          nsi8;
+               int8_t          si8;
+               uint8_t         ui8;
+               int16_t         si16;
+               uint16_t        ui16;
+               int16_t         _pad2;
+               int32_t         si32;
+               uint32_t        ui32;
+               int64_t         si64;
+               float64_t       f64;
+               float32_t       f32;
+               int32_t         _pad3;
+       } data_t;
+       data_t                  data;
+ */
+
+data:
+.size  4
+minus_thirty_six:              // nF32
+.size  4
+minus_thirty_two:              // nF64
+.size  8
+minus_twenty_four:             // nSI64
+.size  8
+minus_sixteen:                 // nUI32
+.size  4
+minus_twelve:                  // nSI32
+.size  4
+.size  2                       // pad
+minus_six:                     // nUI16
+.size  2
+minus_four:                    // nSI16
+.size  2
+minus_two:                     // nUI8
+.size  1
+minus_one:
+.size  1                       // nSI8
+zero:                          // SI8
+.size  1
+one:                           // UI8
+.size  1
+two:                           // SI16
+.size  2
+four:                          // UI16
+.size  2
+.size  2                       // pad
+eight:                         // SI32
+.size  4
+twelve:                                // UI32
+.size  4
+sixteen:                       // SI64
+.size  8
+twenty_four:                   // F64
+.size  8
+thirty_two:                    // F32
+.size  4
+thirty_six:
+.align 8
+/*
+       data_t                  buffer;
+ */
+buffer:
+.size  80
+
+ok:
+.c     "ok"
+
+.code
+       jmpi main
+
+/*
+       void reset(void) {
+               memset(data, -1, sizeof(data));
+               data.nf32  = nF32;
+               data.nf64  = nF64;
+       #if __WORDSIZE == 64
+               data.nsi64 = nSI64;
+               data.nui32 = nUI32;
+       #endif
+               data.nsi32 = nSI32;
+               data.nui16 = nUI16;
+               data.nsi16 = nSI16;
+               data.nui8  = nUI8;
+               data.nsi8  = nSI8;
+               data.si8   = SI8;
+               data.ui8   = UI8;
+               data.si16  = SI16;
+               data.ui16  = UI16;
+               data.si32  = SI32;
+       #if __WORDSIZE == 64
+               data.ui32  = UI32;
+               data.si64  = SI64;
+       #endif
+               data.f64   = F64;
+               data.f32   = F32;
+       }
+ */
+reset:
+       prolog
+       movi %v0  data
+       prepare
+               pushargr %v0
+               pushargi -1
+               pushargi 80
+       finishi @memset
+       addi %v0 %v0 4
+       movi_f %f0 nF32
+       str_f %v0 %f0
+       addi %v0 %v0 $(nF64 - nF32)
+       movi_d %f0 nF64
+       str_d %v0 %f0
+       addi %v0 %v0 $(nSI64 - nF64)    
+       movi %r0 nSI64
+#if __WORDSIZE == 64
+       str_l %v0 %r0
+#endif
+       addi %v0 %v0 $(nUI32 - nSI64)
+       movi %r0 nUI32
+#if __WORDSIZE == 64
+       str_i %v0 %r0
+#endif
+       addi %v0 %v0 $(nSI32 - nUI32)
+       movi %r0 nSI32
+       str_i %v0 %r0
+       addi %v0 %v0 $(nUI16 - nSI32)
+       movi %r0 nUI16
+       str_s %v0 %r0
+       addi %v0 %v0 $(nSI16 - nUI16)
+       movi %r0 nSI16
+       str_s %v0 %r0
+       addi %v0 %v0 $(nUI8 - nSI16)
+       movi %r0 nUI8
+       str_c %v0 %r0
+       addi %v0 %v0 $(nSI8 - nUI8)
+       movi %r0 nSI8
+       str_c %v0 %r0
+       addi %v0 %v0 $(SI8 - nSI8)
+       movi %r0 SI8
+       str_c %v0 %r0
+       addi %v0 %v0 $(UI8 - SI8)
+       movi %r0 UI8
+       str_c %v0 %r0
+       addi %v0 %v0 $(SI16 - UI8)
+       movi %r0 SI16
+       str_s %v0 %r0
+       addi %v0 %v0 $(UI16 - SI16)
+       movi %r0 UI16
+       str_s %v0 %r0
+       addi %v0 %v0 $(SI32 - UI16)
+       movi %r0 SI32
+       str_i %v0 %r0
+       addi %v0 %v0 $(UI32 - SI32)
+       movi %r0 UI32
+#if __WORDSIZE == 64
+       str_i %v0 %r0
+#endif
+       addi %v0 %v0 $(SI64 - UI32)
+       movi %r0 SI64
+#if __WORDSIZE == 64
+       str_l %v0 %r0
+#endif
+       addi %v0 %v0 $(F64 - SI64)
+       movi_d %f0 F64
+       str_d %v0 %f0
+       addi %v0 %v0 $(F32 - F64)
+       movi_f %f0 F32
+       str_f %v0 %f0
+       ret
+       epilog
+
+#if __WORDSIZE == 64
+#  define IF32(expr)                   /**/
+#  define IF64(expr)                   expr
+#else
+#  define IF32(expr)                   expr
+#  define IF64(expr)                   /**/
+#endif
+
+/*
+       union {
+               int8_t          *i8;
+               uint8_t         *u8;
+               int16_t         *i16;
+               uint16_t        *u16;
+               int32_t         *i32;
+               uint32_t        *u32;
+               int64_t         *i64;
+               float32_t       *f32;
+               float64_t       *f64;
+       } u;
+       reset();
+       u.i8 = (char *)data + offsetof(data_t, si8);
+       if (*--u.i8  != nSI8)           goto fail;
+       if (*--u.u8  != nUI8)           goto fail;
+       if (*--u.i16 != nSI16)          goto fail;
+       if (*--u.u16 != nUI16)          goto fail;
+       --u.nsi16;
+       if (*--u.i32 != nSI32)          goto fail;
+#if __WORDSIZE == 64
+       if (*--u.u32 != nUI32)          goto fail;
+       if (*--u.i64 != nSI64)          goto fail;
+#else
+       u.i8 -= 12;
+#endif
+       if (*--u.f64 != nF64)           goto fail;
+       if (*--u.f32 != nF32)           goto fail;
+       u.i8 = (char *)data + offsetof(data_t, si8);
+       if (*u.i8++  != SI8)            goto fail;
+       if (*u.u8++  != UI8)            goto fail;
+       if (*u.i16++ != SI16)           goto fail;
+       if (*u.u16++ != UI16)           goto fail;
+       ++u.i16;
+       if (*u.i32++ != SI32)           goto fail;
+#if __WORDSIZE == 64
+       if (*u.u32++ != UI32)           goto fail;
+       if (*u.i64++ != SI64)           goto fail;
+#else
+       u.i8 += 12;
+#endif
+       if (*u.f64++ != F64)            goto fail;
+       if (*u.f32++ != F32)            goto fail;
+       goto done;
+fail:
+       abort();
+done:
+       memset(buffer, -1, 80);
+       u.i8 = (char *)buffer + offsetof(data_t, si8);
+       *--u.i8  = nSI8;
+       *--u.u8  = nUI8;
+       *--u.i16 = nSI16;
+       *--u.u16 = nUI16;
+       --u.i16;
+       *--u.i32 = nSI32;
+#if __WORDSIZE == 64
+       *--u.u32 = nUI32;
+       *--u.i64 = nSI64;
+#else
+       u.i8 -= 12;
+#endif
+       *--u.f64 = nF64;
+       *--u.f32 = nF32;
+       u.i8 = (char *)buffer + offsetof(data_t, si8);
+       u.i8++  = SI8;
+       u.u8++  = UI8;
+       u.i16++ = SI16;
+       u.u16++ = UI16;
+       ++u.i16;
+       u.i32++ = SI32;
+#if __WORDSIZE == 64
+       u.u32++ = UI32;
+       u.i64++ = SI64;
+#else
+       u.i8 += 12;
+#endif
+       u.f64++ = F64;
+       u.f32++ = F32;
+       if (memcp(buffer, data, sizeof(data_t)))
+               abort();
+ */
+#define TEST(R0, F0, R1)                                               \
+       calli reset                                                     \
+       movi %R1 zero                                                   \
+       ldxbi_c %R0 %R1 $(nSI8 - SI8)                                   \
+       bnei fail##R0##F0##R1 %R0 nSI8                                  \
+       ldxbi_uc %R0 %R1 $(nUI8 - nSI8)                                 \
+       extr_c %R0 %R0                                                  \
+       bnei fail##R0##F0##R1 %R0 nUI8                                  \
+       ldxbi_s %R0 %R1 $(nSI16 - nUI8)                                 \
+       bnei fail##R0##F0##R1 %R0 nSI16                                 \
+       ldxbi_us %R0 %R1 $(nUI16 - nSI16)                               \
+       extr_s %R0 %R0                                                  \
+       bnei fail##R0##F0##R1 %R0 nUI16                                 \
+       ldxbi_i %R0 %R1 $(nSI32 - nUI16)                                \
+       bnei fail##R0##F0##R1 %R0 nSI32                                 \
+       IF64(ldxbi_ui %R0 %R1 $(nUI32 - nSI32))                         \
+       IF64(extr_i %R0 %R0)                                            \
+       IF64(bnei fail##R0##F0##R1 %R0 nUI32)                           \
+       IF32(addi %R1 %R1 $(nUI32 - nSI32))                             \
+       IF64(ldxbi_l %R0 %R1 $(nSI64 - nUI32))                          \
+       IF64(bnei fail##R0##F0##R1 %R0 nSI64)                           \
+       IF32(addi %R1 %R1 $(nSI64 - nUI32))                             \
+       ldxbi_d %F0 %R1 $(nF64 - nSI64)                                 \
+       bnei_d fail##R0##F0##R1 %F0 nF64                                \
+       ldxbi_f %F0 %R1 $(nF32 - nF64)                                  \
+       bnei_f fail##R0##F0##R1 %F0 nF32                                \
+       movi %R1 zero                                                   \
+       ldxai_c %R0 %R1 $(UI8 - SI8)                                    \
+       bnei fail##R0##F0##R1 %R0 SI8                                   \
+       ldxai_uc %R0 %R1 $(SI16 - UI8)                                  \
+       bnei fail##R0##F0##R1 %R0 UI8                                   \
+       ldxai_s %R0 %R1 $(UI16 - SI16)                                  \
+       bnei fail##R0##F0##R1 %R0 SI16                                  \
+       ldxai_us %R0 %R1 $(SI32 - UI16)                                 \
+       bnei fail##R0##F0##R1 %R0 UI16                                  \
+       ldxai_i %R0 %R1 $(UI32 - SI32)                                  \
+       bnei fail##R0##F0##R1 %R0 SI32                                  \
+       IF64(ldxai_ui %R0 %R1 $(SI64 - UI32))                           \
+       IF64(bnei fail##R0##F0##R1 %R0 UI32)                            \
+       IF32(addi %R1 %R1 $(SI64 - UI32))                               \
+       IF64(ldxai_l %R0 %R1 $(F64 - SI64))                             \
+       IF64(bnei fail##R0##F0##R1 %R0 SI64)                            \
+       IF32(addi %R1 %R1 $(F64 - SI64))                                \
+       ldxai_d %F0 %R1 $(F32 - F64)                                    \
+       bnei_d fail##R0##F0##R1 %F0 F64                                 \
+       ldxai_f %F0 %R1 $(36 - F32)                                     \
+       bnei_f fail##R0##F0##R1 %F0 F32                                 \
+       jmpi done##R0##F0##R1                                           \
+fail##R0##F0##R1:                                                      \
+       calli @abort                                                    \
+done##R0##F0##R1:                                                      \
+       prepare                                                         \
+               pushargi buffer                                         \
+               pushargi -1                                             \
+               pushargi 80                                             \
+       finishi @memset                                                 \
+       movi %R1 buffer                                                 \
+       addi %R1 %R1 40                                                 \
+       movi %R0 nSI8                                                   \
+       stxbi_c $(nSI8 - SI8) %R1 %R0                                   \
+       movi %R0 nUI8                                                   \
+       extr_uc %R0 %R0                                                 \
+       stxbi_c $(nUI8 - nSI8) %R1 %R0                                  \
+       movi %R0 nSI16                                                  \
+       stxbi_s $(nSI16 - nUI8) %R1 %R0                                 \
+       movi %R0 nUI16                                                  \
+       extr_us %R0 %R0                                                 \
+       stxbi_s $(nUI16 - nSI16) %R1 %R0                                \
+       movi %R0 nSI32                                                  \
+       stxbi_i $(nSI32 - nUI16) %R1 %R0                                \
+       IF64(movi %R0 nUI32)                                            \
+       IF64(stxbi_i $(nUI32 - nSI32) %R1 %R0)                          \
+       IF32(addi %R1 %R1 $(nUI32 - nSI32))                             \
+       IF64(movi %R0 nSI64)                                            \
+       IF64(stxbi_l $(nSI64 - nUI32) %R1 %R0)                          \
+       IF32(addi %R1 %R1 $(nSI64 - nUI32))                             \
+       movi_d %F0 nF64                                                 \
+       stxbi_d $(nF64 - nSI64) %R1 %F0                                 \
+       movi_f %F0 nF32                                                 \
+       stxbi_f $(nF32 - nF64) %R1 %F0                                  \
+       movi %R1 buffer                                                 \
+       addi %R1 %R1 40                                                 \
+       movi %R0 SI8                                                    \
+       stxai_c $(UI8 - SI8) %R1 %R0                                    \
+       movi %R0 UI8                                                    \
+       stxai_c $(SI16 - UI8) %R1 %R0                                   \
+       movi %R0 SI16                                                   \
+       stxai_s $(UI16 - SI16) %R1 %R0                                  \
+       movi %R0 UI16                                                   \
+       stxai_s $(SI32 - UI16) %R1 %R0                                  \
+       movi %R0 SI32                                                   \
+       stxai_i $(UI32 - SI32) %R1 %R0                                  \
+       IF64(movi %R0 UI32)                                             \
+       IF64(stxai_i $(SI64 - UI32) %R1 %R0)                            \
+       IF32(addi %R1 %R1 $(SI64 - UI32))                               \
+       IF64(movi %R0 SI64)                                             \
+       IF64(stxai_l $(F64 - SI64) %R1 %R0)                             \
+       IF32(addi %R1 %R1 $(F64 - SI64))                                \
+       movi_d %F0 F64                                                  \
+       stxai_d $(F32 - F64) %R1 %F0                                    \
+       movi_f %F0 F32                                                  \
+       stxai_f $(36 - F32) %R1 %F0                                     \
+       prepare                                                         \
+               pushargi data                                           \
+               pushargi buffer                                         \
+               pushargi 80                                             \
+       finishi @memcmp                                                 \
+       retval %R0                                                      \
+       beqi done2##R0##F0##R1 %R0 0                                    \
+       calli @abort                                                    \
+done2##R0##F0##R1:
+
+main:
+       prolog
+       TEST(r0, f0, r1)
+       TEST(r0, f0, r2)
+       TEST(r0, f0, v0)
+       TEST(r0, f0, v1)
+       TEST(r0, f0, v2)
+       TEST(r1, f1, r0)
+       TEST(r1, f1, r2)
+       TEST(r1, f1, v0)
+       TEST(r1, f1, v1)
+       TEST(r1, f1, v2)
+       TEST(r2, f2, r0)
+       TEST(r2, f2, r1)
+       TEST(r2, f2, v0)
+       TEST(r2, f2, v1)
+       TEST(r2, f2, v2)
+       TEST(v0, f3, r0)
+       TEST(v0, f3, r1)
+       TEST(v0, f3, r2)
+       TEST(v0, f3, v1)
+       TEST(v0, f3, v2)
+       TEST(v1, f4, r0)
+       TEST(v1, f4, r1)
+       TEST(v1, f4, r2)
+       TEST(v1, f4, v0)
+       TEST(v1, f4, v2)
+       TEST(v2, f5, r0)
+       TEST(v2, f5, r1)
+       TEST(v2, f5, r2)
+       TEST(v2, f5, v0)
+       TEST(v2, f5, v1)
+       prepare
+               pushargi ok
+       finishi @puts
+       ret
+       epilog
diff --git a/deps/lightning/check/ldstxbar.ok b/deps/lightning/check/ldstxbar.ok
new file mode 100644 (file)
index 0000000..9766475
--- /dev/null
@@ -0,0 +1 @@
+ok
diff --git a/deps/lightning/check/ldstxbar.tst b/deps/lightning/check/ldstxbar.tst
new file mode 100644 (file)
index 0000000..cd69bdf
--- /dev/null
@@ -0,0 +1,480 @@
+.data  256
+
+/*
+       #define offs(field)     (offsetof(data_t, field) - offsetof(data_t, si8))
+ */
+#define nF32           -36     // offs(nf32)
+#define nF64           -32     // offs(nf64)
+#define nSI64          -24     // offs(nsi64)
+#define nUI32          -16     // offs(nui32)
+#define nSI32          -12     // offs(nsi32)
+#define nUI16           -6     // offs(nui16)
+#define nSI16           -4     // offs(nsi16)
+#define nUI8            -2     // offs(nui8)
+#define nSI8            -1     // offs(nsi8)
+#define SI8              0     // offs(si8)
+#define UI8              1     // offs(ui8)
+#define SI16             2     // offs(si18)
+#define UI16             4     // offs(ui16)
+#define SI32             8     // offs(si32)
+#define UI32            12     // offs(ui32)
+#define SI64            16     // offs(si64)
+#define F64             24     // offs(f64)
+#define F32             32     // offs(f32)
+
+/*
+       typedef struct {
+               int32_t         _pad0;
+               float32_t       nf32;
+               float64_t       nf64;
+               int64_t         nsi64;
+               uint32_t        nui32;
+               int32_t         nsi32;
+               short           _pad1;
+               uint16_t        nui16;
+               int16_6         nsi16;
+               uint8_t         nui8;
+               int8_t          nsi8;
+               int8_t          si8;
+               uint8_t         ui8;
+               int16_t         si16;
+               uint16_t        ui16;
+               int16_t         _pad2;
+               int32_t         si32;
+               uint32_t        ui32;
+               int64_t         si64;
+               float64_t       f64;
+               float32_t       f32;
+               int32_t         _pad3;
+       } data_t;
+       data_t                  data;
+ */
+
+data:
+.size  4
+minus_thirty_six:              // nF32
+.size  4
+minus_thirty_two:              // nF64
+.size  8
+minus_twenty_four:             // nSI64
+.size  8
+minus_sixteen:                 // nUI32
+.size  4
+minus_twelve:                  // nSI32
+.size  4
+.size  2                       // pad
+minus_six:                     // nUI16
+.size  2
+minus_four:                    // nSI16
+.size  2
+minus_two:                     // nUI8
+.size  1
+minus_one:
+.size  1                       // nSI8
+zero:                          // SI8
+.size  1
+one:                           // UI8
+.size  1
+two:                           // SI16
+.size  2
+four:                          // UI16
+.size  2
+.size  2                       // pad
+eight:                         // SI32
+.size  4
+twelve:                                // UI32
+.size  4
+sixteen:                       // SI64
+.size  8
+twenty_four:                   // F64
+.size  8
+thirty_two:                    // F32
+.size  4
+thirty_six:
+.align 8
+/*
+       data_t                  buffer;
+ */
+buffer:
+.size  80
+
+ok:
+.c     "ok"
+
+.code
+       jmpi main
+
+/*
+       void reset(void) {
+               memset(data, -1, sizeof(data));
+               data.nf32  = nF32;
+               data.nf64  = nF64;
+       #if __WORDSIZE == 64
+               data.nsi64 = nSI64;
+               data.nui32 = nUI32;
+       #endif
+               data.nsi32 = nSI32;
+               data.nui16 = nUI16;
+               data.nsi16 = nSI16;
+               data.nui8  = nUI8;
+               data.nsi8  = nSI8;
+               data.si8   = SI8;
+               data.ui8   = UI8;
+               data.si16  = SI16;
+               data.ui16  = UI16;
+               data.si32  = SI32;
+       #if __WORDSIZE == 64
+               data.ui32  = UI32;
+               data.si64  = SI64;
+       #endif
+               data.f64   = F64;
+               data.f32   = F32;
+       }
+ */
+reset:
+       prolog
+       movi %v0  data
+       prepare
+               pushargr %v0
+               pushargi -1
+               pushargi 80
+       finishi @memset
+       addi %v0 %v0 4
+       movi_f %f0 nF32
+       str_f %v0 %f0
+       addi %v0 %v0 $(nF64 - nF32)
+       movi_d %f0 nF64
+       str_d %v0 %f0
+       addi %v0 %v0 $(nSI64 - nF64)
+       movi %r0 nSI64
+#if __WORDSIZE == 64
+       str_l %v0 %r0
+#endif
+       addi %v0 %v0 $(nUI32 - nSI64)
+       movi %r0 nUI32
+#if __WORDSIZE == 64
+       str_i %v0 %r0
+#endif
+       addi %v0 %v0 $(nSI32 - nUI32)
+       movi %r0 nSI32
+       str_i %v0 %r0
+       addi %v0 %v0 $(nUI16 - nSI32)
+       movi %r0 nUI16
+       str_s %v0 %r0
+       addi %v0 %v0 $(nSI16 - nUI16)
+       movi %r0 nSI16
+       str_s %v0 %r0
+       addi %v0 %v0 $(nUI8 - nSI16)
+       movi %r0 nUI8
+       str_c %v0 %r0
+       addi %v0 %v0 $(nSI8 - nUI8)
+       movi %r0 nSI8
+       str_c %v0 %r0
+       addi %v0 %v0 $(SI8 - nSI8)
+       movi %r0 SI8
+       str_c %v0 %r0
+       addi %v0 %v0 $(UI8 - SI8)
+       movi %r0 UI8
+       str_c %v0 %r0
+       addi %v0 %v0 $(SI16 - UI8)
+       movi %r0 SI16
+       str_s %v0 %r0
+       addi %v0 %v0 $(UI16 - SI16)
+       movi %r0 UI16
+       str_s %v0 %r0
+       addi %v0 %v0 $(SI32 - UI16)
+       movi %r0 SI32
+       str_i %v0 %r0
+       addi %v0 %v0 $(UI32 - SI32)
+       movi %r0 UI32
+#if __WORDSIZE == 64
+       str_i %v0 %r0
+#endif
+       addi %v0 %v0 $(SI64 - UI32)
+       movi %r0 SI64
+#if __WORDSIZE == 64
+       str_l %v0 %r0
+#endif
+       addi %v0 %v0 $(F64 - SI64)
+       movi_d %f0 F64
+       str_d %v0 %f0
+       addi %v0 %v0 $(F32 - F64)
+       movi_f %f0 F32
+       str_f %v0 %f0
+       ret
+       epilog
+
+#if __WORDSIZE == 64
+#  define IF32(expr)                   /**/
+#  define IF64(expr)                   expr
+#else
+#  define IF32(expr)                   expr
+#  define IF64(expr)                   /**/
+#endif
+
+/*
+       union {
+               int8_t          *i8;
+               uint8_t         *u8;
+               int16_t         *i16;
+               uint16_t        *u16;
+               int32_t         *i32;
+               uint32_t        *u32;
+               int64_t         *i64;
+               float32_t       *f32;
+               float64_t       *f64;
+       } u;
+       reset();
+       u.i8 = (char *)data + offsetof(data_t, si8);
+       if (*--u.i8  != nSI8)           goto fail;
+       if (*--u.u8  != nUI8)           goto fail;
+       if (*--u.i16 != nSI16)          goto fail;
+       if (*--u.u16 != nUI16)          goto fail;
+       --u.nsi16;
+       if (*--u.i32 != nSI32)          goto fail;
+#if __WORDSIZE == 64
+       if (*--u.u32 != nUI32)          goto fail;
+       if (*--u.i64 != nSI64)          goto fail;
+#else
+       u.i8 -= 12;
+#endif
+       if (*--u.f64 != nF64)           goto fail;
+       if (*--u.f32 != nF32)           goto fail;
+       u.i8 = (char *)data + offsetof(data_t, si8);
+       if (*u.i8++  != SI8)            goto fail;
+       if (*u.u8++  != UI8)            goto fail;
+       if (*u.i16++ != SI16)           goto fail;
+       if (*u.u16++ != UI16)           goto fail;
+       ++u.i16;
+       if (*u.i32++ != SI32)           goto fail;
+#if __WORDSIZE == 64
+       if (*u.u32++ != UI32)           goto fail;
+       if (*u.i64++ != SI64)           goto fail;
+#else
+       u.i8 += 12;
+#endif
+       if (*u.f64++ != F64)            goto fail;
+       if (*u.f32++ != F32)            goto fail;
+       goto done;
+fail:
+       abort();
+done:
+       memset(buffer, -1, 80);
+       u.i8 = (char *)buffer + offsetof(data_t, si8);
+       *--u.i8  = nSI8;
+       *--u.u8  = nUI8;
+       *--u.i16 = nSI16;
+       *--u.u16 = nUI16;
+       --u.i16;
+       *--u.i32 = nSI32;
+#if __WORDSIZE == 64
+       *--u.u32 = nUI32;
+       *--u.i64 = nSI64;
+#else
+       u.i8 -= 12;
+#endif
+       *--u.f64 = nF64;
+       *--u.f32 = nF32;
+       u.i8 = (char *)buffer + offsetof(data_t, si8);
+       u.i8++  = SI8;
+       u.u8++  = UI8;
+       u.i16++ = SI16;
+       u.u16++ = UI16;
+       ++u.i16;
+       u.i32++ = SI32;
+#if __WORDSIZE == 64
+       u.u32++ = UI32;
+       u.i64++ = SI64;
+#else
+       u.i8 += 12;
+#endif
+       u.f64++ = F64;
+       u.f32++ = F32;
+       if (memcp(buffer, data, sizeof(data_t)))
+               abort();
+ */
+#define TEST(R0, F0, R1, R2)                                           \
+       calli reset                                                     \
+       movi %R1 zero                                                   \
+       movi %R2 $(nSI8 - SI8)                                          \
+       ldxbr_c %R0 %R1 %R2                                             \
+       bnei fail##R0##F0##R1 %R0 nSI8                                  \
+       movi %R2 $(nUI8 - nSI8)                                         \
+       ldxbr_uc %R0 %R1 %R2                                            \
+       extr_c %R0 %R0                                                  \
+       bnei fail##R0##F0##R1 %R0 nUI8                                  \
+       movi %R2 $(nSI16 - nUI8)                                        \
+       ldxbr_s %R0 %R1 %R2                                             \
+       bnei fail##R0##F0##R1 %R0 nSI16                                 \
+       movi %R2 $(nUI16 - nSI16)                                       \
+       ldxbr_us %R0 %R1 %R2                                            \
+       extr_s %R0 %R0                                                  \
+       bnei fail##R0##F0##R1 %R0 nUI16                                 \
+       movi %R2 $(nSI32 - nUI16)                                       \
+       ldxbr_i %R0 %R1 %R2                                             \
+       bnei fail##R0##F0##R1 %R0 nSI32                                 \
+       IF64(movi %R2 $(nUI32 - nSI32))                                 \
+       IF64(ldxbr_ui %R0 %R1 %R2)                                      \
+       IF64(extr_i %R0 %R0)                                            \
+       IF64(bnei fail##R0##F0##R1 %R0 nUI32)                           \
+       IF32(addi %R1 %R1 $(nUI32 - nSI32))                             \
+       IF64(movi %R2 $(nSI64 - nUI32))                                 \
+       IF64(ldxbr_l %R0 %R1 %R2)                                       \
+       IF64(bnei fail##R0##F0##R1 %R0 nSI64)                           \
+       IF32(addi %R1 %R1 $(nSI64 - nUI32))                             \
+       movi %R2 $(nF64 - nSI64)                                        \
+       ldxbr_d %F0 %R1 %R2                                             \
+       bnei_d fail##R0##F0##R1 %F0 nF64                                \
+       movi %R2 $(nF32 - nF64)                                         \
+       ldxbr_f %F0 %R1 %R2                                             \
+       bnei_f fail##R0##F0##R1 %F0 nF32                                \
+       movi %R1 zero                                                   \
+       movi %R2 $(UI8 - SI8)                                           \
+       ldxar_c %R0 %R1 %R2                                             \
+       bnei fail##R0##F0##R1 %R0 SI8                                   \
+       movi %R2 $(SI16 - UI8)                                          \
+       ldxar_uc %R0 %R1 %R2                                            \
+       bnei fail##R0##F0##R1 %R0 UI8                                   \
+       movi %R2 $(UI16 - SI16)                                         \
+       ldxar_s %R0 %R1 %R2                                             \
+       bnei fail##R0##F0##R1 %R0 SI16                                  \
+       movi %R2 $(SI32 - UI16)                                         \
+       ldxar_us %R0 %R1 %R2                                            \
+       bnei fail##R0##F0##R1 %R0 UI16                                  \
+       movi %R2 $(UI32 - SI32)                                         \
+       ldxar_i %R0 %R1 %R2                                             \
+       bnei fail##R0##F0##R1 %R0 SI32                                  \
+       IF64(movi %R2 $(SI64 - UI32))                                   \
+       IF64(ldxar_ui %R0 %R1 %R2)                                      \
+       IF64(bnei fail##R0##F0##R1 %R0 UI32)                            \
+       IF32(addi %R1 %R1 $(SI64 - UI32))                               \
+       IF64(movi %R2 $(F64 - SI64))                                    \
+       IF64(ldxar_l %R0 %R1 %R2)                                       \
+       IF64(bnei fail##R0##F0##R1 %R0 SI64)                            \
+       IF32(addi %R1 %R1 $(F64 - SI64))                                \
+       movi %R2 $(F32 - F64)                                           \
+       ldxar_d %F0 %R1 %R2                                             \
+       bnei_d fail##R0##F0##R1 %F0 F64                                 \
+       movi %R2 $(36 - F32)                                            \
+       ldxar_f %F0 %R1 %R2                                             \
+       bnei_f fail##R0##F0##R1 %F0 F32                                 \
+       jmpi done##R0##F0##R1                                           \
+fail##R0##F0##R1:                                                      \
+       calli @abort                                                    \
+done##R0##F0##R1:                                                      \
+       prepare                                                         \
+               pushargi buffer                                         \
+               pushargi -1                                             \
+               pushargi 80                                             \
+       finishi @memset                                                 \
+       movi %R1 buffer                                                 \
+       addi %R1 %R1 40                                                 \
+       movi %R0 nSI8                                                   \
+       movi %R2 $(nSI8 - SI8)                                          \
+       stxbr_c %R2 %R1 %R0                                             \
+       movi %R0 nUI8                                                   \
+       extr_uc %R0 %R0                                                 \
+       movi %R2 $(nUI8 - nSI8)                                         \
+       stxbr_c %R2 %R1 %R0                                             \
+       movi %R0 nSI16                                                  \
+       movi %R2 $(nSI16 - nUI8)                                        \
+       stxbr_s %R2 %R1 %R0                                             \
+       movi %R0 nUI16                                                  \
+       extr_us %R0 %R0                                                 \
+       movi %R2 $(nUI16 - nSI16)                                       \
+       stxbr_s %R2 %R1 %R0                                             \
+       movi %R0 nSI32                                                  \
+       movi %R2 $(nSI32 - nUI16)                                       \
+       stxbr_i %R2 %R1 %R0                                             \
+       IF64(movi %R0 nUI32)                                            \
+       IF64(movi %R2 $(nUI32 - nSI32))                                 \
+       IF64(stxbr_i %R2 %R1 %R0)                                       \
+       IF32(addi %R1 %R1 $(nUI32 - nSI32))                             \
+       IF64(movi %R0 nSI64)                                            \
+       IF64(movi %R2 $(nSI64 - nUI32))                                 \
+       IF64(stxbr_l %R2 %R1 %R0)                                       \
+       IF32(addi %R1 %R1 $(nSI64 - nUI32))                             \
+       movi_d %F0 nF64                                                 \
+       movi %R2 $(nF64 - nSI64)                                        \
+       stxbr_d %R2 %R1 %F0                                             \
+       movi_f %F0 nF32                                                 \
+       movi %R2 $(nF32 - nF64)                                         \
+       stxbr_f %R2 %R1 %F0                                             \
+       movi %R1 buffer                                                 \
+       addi %R1 %R1 40                                                 \
+       movi %R0 SI8                                                    \
+       movi %R2 $(UI8 - SI8)                                           \
+       stxar_c %R2 %R1 %R0                                             \
+       movi %R0 UI8                                                    \
+       movi %R2 $(SI16 - UI8)                                          \
+       stxar_c %R2 %R1 %R0                                             \
+       movi %R0 SI16                                                   \
+       movi %R2 $(UI16 - SI16)                                         \
+       stxar_s %R2 %R1 %R0                                             \
+       movi %R0 UI16                                                   \
+       movi %R2 $(SI32 - UI16)                                         \
+       stxar_s %R2 %R1 %R0                                             \
+       movi %R0 SI32                                                   \
+       movi %R2 $(UI32 - SI32)                                         \
+       stxar_i %R2 %R1 %R0                                             \
+       IF64(movi %R0 UI32)                                             \
+       IF64(movi %R2 $(SI64 - UI32))                                   \
+       IF64(stxar_i %R2 %R1 %R0)                                       \
+       IF32(addi %R1 %R1 $(SI64 - UI32))                               \
+       IF64(movi %R0 SI64)                                             \
+       IF64(movi %R2 $(F64 - SI64))                                    \
+       IF64(stxar_l %R2 %R1 %R0)                                       \
+       IF32(addi %R1 %R1 $(F64 - SI64))                                \
+       movi_d %F0 F64                                                  \
+       movi %R2 $(F32 - F64)                                           \
+       stxar_d %R2 %R1 %F0                                             \
+       movi_f %F0 F32                                                  \
+       movi %R2 $(36 - F32)                                            \
+       stxar_f %R2 %R1 %F0                                             \
+       prepare                                                         \
+               pushargi data                                           \
+               pushargi buffer                                         \
+               pushargi 80                                             \
+       finishi @memcmp                                                 \
+       retval %R0                                                      \
+       beqi done2##R0##F0##R1 %R0 0                                    \
+       calli @abort                                                    \
+done2##R0##F0##R1:
+
+main:
+       prolog
+       TEST(r0, f0, r1, r2)
+       TEST(r0, f0, r2, v0)
+       TEST(r0, f0, v0, v1)
+       TEST(r0, f0, v1, v2)
+       TEST(r0, f0, v2, r1)
+       TEST(r1, f1, r0, r2)
+       TEST(r1, f1, r2, v0)
+       TEST(r1, f1, v0, v1)
+       TEST(r1, f1, v1, v2)
+       TEST(r1, f1, v2, r0)
+       TEST(r2, f2, r0, r1)
+       TEST(r2, f2, r1, v0)
+       TEST(r2, f2, v0, v1)
+       TEST(r2, f2, v1, v2)
+       TEST(r2, f2, v2, r0)
+       TEST(v0, f3, r0, r1)
+       TEST(v0, f3, r1, r2)
+       TEST(v0, f3, r2, v1)
+       TEST(v0, f3, v1, v2)
+       TEST(v0, f3, v2, r0)
+       TEST(v1, f4, r0, r1)
+       TEST(v1, f4, r1, r2)
+       TEST(v1, f4, r2, v0)
+       TEST(v1, f4, v0, v2)
+       TEST(v1, f4, v2, r0)
+       TEST(v2, f5, r0, r1)
+       TEST(v2, f5, r1, r2)
+       TEST(v2, f5, r2, v0)
+       TEST(v2, f5, v0, v1)
+       TEST(v2, f5, v1, r0)
+       prepare
+               pushargi ok
+       finishi @puts
+       ret
+       epilog
index 9bb5c5b..d485b5c 100644 (file)
@@ -398,7 +398,30 @@ static void ldxr_l(void);  static void ldxi_l(void);
 static void ldxr(void);                static void ldxi(void);
 static void unldr(void);       static void unldi(void);
 static void unldr_u(void);     static void unldi_u(void);
-static void str_c(void);       static void sti_c(void);
+static void ldxbr_c(void);     static void ldxbi_c(void);
+static void ldxar_c(void);     static void ldxai_c(void);
+static void ldxbr_uc(void);    static void ldxbi_uc(void);
+static void ldxar_uc(void);    static void ldxai_uc(void);
+static void ldxbr_s(void);     static void ldxbi_s(void);
+static void ldxar_s(void);     static void ldxai_s(void);
+static void ldxbr_us(void);    static void ldxbi_us(void);
+static void ldxar_us(void);    static void ldxai_us(void);
+static void ldxbr_i(void);     static void ldxbi_i(void);
+static void ldxar_i(void);     static void ldxai_i(void);
+#if __WORDSIZE == 64
+static void ldxbr_ui(void);    static void ldxbi_ui(void);
+static void ldxar_ui(void);    static void ldxai_ui(void);
+static void ldxbr_l(void);     static void ldxbi_l(void);
+static void ldxar_l(void);     static void ldxai_l(void);
+#endif
+static void ldxbr(void);       static void ldxbi(void);
+static void ldxar(void);       static void ldxai(void);
+static void ldxbr_f(void);     static void ldxbi_f(void);
+static void ldxar_f(void);     static void ldxai_f(void);
+static void ldxbr_d(void);     static void ldxbi_d(void);
+static void ldxar_d(void);     static void ldxai_d(void);
+static void str_c(void);
+static void sti_c(void);
 static void str_s(void);       static void sti_s(void);
 static void str_i(void);       static void sti_i(void);
 #if __WORDSIZE == 64
@@ -413,6 +436,22 @@ static void stxr_l(void);  static void stxi_l(void);
 #endif
 static void stxr(void);                static void stxi(void);
 static void unstr(void);       static void unsti(void);
+static void stxbr_c(void);     static void stxbi_c(void);
+static void stxar_c(void);     static void stxai_c(void);
+static void stxbr_s(void);     static void stxbi_s(void);
+static void stxar_s(void);     static void stxai_s(void);
+static void stxbr_i(void);     static void stxbi_i(void);
+static void stxar_i(void);     static void stxai_i(void);
+#if __WORDSIZE == 64
+static void stxbr_l(void);     static void stxbi_l(void);
+static void stxar_l(void);     static void stxai_l(void);
+#endif
+static void stxbr_f(void);     static void stxbi_f(void);
+static void stxar_f(void);     static void stxai_f(void);
+static void stxbr_d(void);     static void stxbi_d(void);
+static void stxar_d(void);     static void stxai_d(void);
+static void stxbr(void);       static void stxbi(void);
+static void stxar(void);       static void stxai(void);
 static void bltr(void);                static void blti(void);
 static void bltr_u(void);      static void blti_u(void);
 static void bler(void);                static void blei(void);
@@ -802,6 +841,28 @@ static instr_t               instr_vector[] = {
     entry(ldxr),       entry(ldxi),
     entry(unldr),      entry(unldi),
     entry(unldr_u),    entry(unldi_u),
+    entry(ldxbr_c),    entry(ldxbi_c),
+    entry(ldxar_c),    entry(ldxai_c),
+    entry(ldxbr_uc),   entry(ldxbi_uc),
+    entry(ldxar_uc),   entry(ldxai_uc),
+    entry(ldxbr_s),    entry(ldxbi_s),
+    entry(ldxar_s),    entry(ldxai_s),
+    entry(ldxbr_us),   entry(ldxbi_us),
+    entry(ldxar_us),   entry(ldxai_us),
+    entry(ldxbr_i),    entry(ldxbi_i),
+    entry(ldxar_i),    entry(ldxai_i),
+#if __WORDSIZE == 64
+    entry(ldxbr_ui),   entry(ldxbi_ui),
+    entry(ldxar_ui),   entry(ldxai_ui),
+    entry(ldxbr_l),    entry(ldxbi_l),
+    entry(ldxar_l),    entry(ldxai_l),
+#endif
+    entry(ldxbr_f),    entry(ldxbi_f),
+    entry(ldxar_f),    entry(ldxai_f),
+    entry(ldxbr_d),    entry(ldxbi_d),
+    entry(ldxar_d),    entry(ldxai_d),
+    entry(ldxbr),      entry(ldxbi),
+    entry(ldxar),      entry(ldxai),
     entry(str_c),      entry(sti_c),
     entry(str_s),      entry(sti_s),
     entry(str_i),      entry(sti_i),
@@ -817,6 +878,22 @@ static instr_t               instr_vector[] = {
 #endif
     entry(stxr),       entry(stxi),
     entry(unstr),      entry(unsti),
+    entry(stxbr_c),    entry(stxbi_c),
+    entry(stxar_c),    entry(stxai_c),
+    entry(stxbr_s),    entry(stxbi_s),
+    entry(stxar_s),    entry(stxai_s),
+    entry(stxbr_i),    entry(stxbi_i),
+    entry(stxar_i),    entry(stxai_i),
+#if __WORDSIZE == 64
+    entry(stxbr_l),    entry(stxbi_l),
+    entry(stxar_l),    entry(stxai_l),
+#endif
+    entry(stxbr_f),    entry(stxbi_f),
+    entry(stxar_f),    entry(stxai_f),
+    entry(stxbr_d),    entry(stxbi_d),
+    entry(stxar_d),    entry(stxai_d),
+    entry(stxbr),      entry(stxbi),
+    entry(stxar),      entry(stxai),
     entry(bltr),       entry(blti),
     entry(bltr_u),     entry(blti_u),
     entry(bler),       entry(blei),
@@ -1866,6 +1943,28 @@ entry_ir_ir_ir(ldxr_l)           entry_ir_ir_im(ldxi_l)
 entry_ir_ir_ir(ldxr)           entry_ir_ir_im(ldxi)
 entry_ir_ir_im(unldr)          entry_ir_im_im(unldi)
 entry_ir_ir_im(unldr_u)                entry_ir_im_im(unldi_u)
+entry_ir_ir_ir(ldxbr_c)                entry_ir_ir_im(ldxbi_c)
+entry_ir_ir_ir(ldxar_c)                entry_ir_ir_im(ldxai_c)
+entry_ir_ir_ir(ldxbr_uc)       entry_ir_ir_im(ldxbi_uc)
+entry_ir_ir_ir(ldxar_uc)       entry_ir_ir_im(ldxai_uc)
+entry_ir_ir_ir(ldxbr_s)                entry_ir_ir_im(ldxbi_s)
+entry_ir_ir_ir(ldxar_s)                entry_ir_ir_im(ldxai_s)
+entry_ir_ir_ir(ldxbr_us)       entry_ir_ir_im(ldxbi_us)
+entry_ir_ir_ir(ldxar_us)       entry_ir_ir_im(ldxai_us)
+entry_ir_ir_ir(ldxbr_i)                entry_ir_ir_im(ldxbi_i)
+entry_ir_ir_ir(ldxar_i)                entry_ir_ir_im(ldxai_i)
+#if __WORDSIZE == 64
+entry_ir_ir_ir(ldxbr_ui)       entry_ir_ir_im(ldxbi_ui)
+entry_ir_ir_ir(ldxar_ui)       entry_ir_ir_im(ldxai_ui)
+entry_ir_ir_ir(ldxbr_l)                entry_ir_ir_im(ldxbi_l)
+entry_ir_ir_ir(ldxar_l)                entry_ir_ir_im(ldxai_l)
+#endif
+entry_ir_ir_ir(ldxbr)          entry_ir_ir_im(ldxbi)
+entry_ir_ir_ir(ldxar)          entry_ir_ir_im(ldxai)
+entry_fr_ir_ir(ldxbr_f)                entry_fr_ir_im(ldxbi_f)
+entry_fr_ir_ir(ldxar_f)                entry_fr_ir_im(ldxai_f)
+entry_fr_ir_ir(ldxbr_d)                entry_fr_ir_im(ldxbi_d)
+entry_fr_ir_ir(ldxar_d)                entry_fr_ir_im(ldxai_d)
 entry_ir_ir(str_c)             entry_pm_ir(sti_c)
 entry_ir_ir(str_s)             entry_pm_ir(sti_s)
 entry_ir_ir(str_i)             entry_pm_ir(sti_i)
@@ -1881,6 +1980,22 @@ entry_ir_ir_ir(stxr_l)           entry_im_ir_ir(stxi_l)
 #endif
 entry_ir_ir_ir(stxr)           entry_im_ir_ir(stxi)
 entry_ir_ir_im(unstr)          entry_im_ir_im(unsti)
+entry_ir_ir_ir(stxbr_c)                entry_im_ir_ir(stxbi_c)
+entry_ir_ir_ir(stxar_c)                entry_im_ir_ir(stxai_c)
+entry_ir_ir_ir(stxbr_s)                entry_im_ir_ir(stxbi_s)
+entry_ir_ir_ir(stxar_s)                entry_im_ir_ir(stxai_s)
+entry_ir_ir_ir(stxbr_i)                entry_im_ir_ir(stxbi_i)
+entry_ir_ir_ir(stxar_i)                entry_im_ir_ir(stxai_i)
+#if __WORDSIZE == 64
+entry_ir_ir_ir(stxbr_l)                entry_im_ir_ir(stxbi_l)
+entry_ir_ir_ir(stxar_l)                entry_im_ir_ir(stxai_l)
+#endif
+entry_ir_ir_ir(stxbr)          entry_im_ir_ir(stxbi)
+entry_ir_ir_ir(stxar)          entry_im_ir_ir(stxai)
+entry_ir_ir_fr(stxbr_f)                entry_im_ir_fr(stxbi_f)
+entry_ir_ir_fr(stxar_f)                entry_im_ir_fr(stxai_f)
+entry_ir_ir_fr(stxbr_d)                entry_im_ir_fr(stxbi_d)
+entry_ir_ir_fr(stxar_d)                entry_im_ir_fr(stxai_d)
 entry_lb_ir_ir(bltr)           entry_lb_ir_im(blti)
 entry_lb_ir_ir(bltr_u)         entry_lb_ir_im(blti_u)
 entry_lb_ir_ir(bler)           entry_lb_ir_im(blei)
@@ -4744,6 +4859,11 @@ main(int argc, char *argv[])
     opt_short += snprintf(cmdline + opt_short,
                          sizeof(cmdline) - opt_short,
                          " -D__loongarch__=1");
+#endif
+#if defined(__sh__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__sh__=1");
 #endif
     if ((parser.fp = popen(cmdline, "r")) == NULL)
        error("cannot execute %s", cmdline);
index 76457b4..31594ad 100644 (file)
@@ -15,7 +15,7 @@ dnl License for more details.
 dnl
 
 AC_PREREQ([2.71])
-AC_INIT([GNU lightning],[2.2.2],[pcpa@gnu.org],[lightning])
+AC_INIT([GNU lightning],[2.2.3],[pcpa@gnu.org],[lightning])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CANONICAL_TARGET
 AC_CONFIG_SRCDIR([Makefile.am])
@@ -222,6 +222,7 @@ case "$target_cpu" in
     alpha*)            cpu=alpha       ;;
     riscv*)            cpu=riscv       ;;
     loongarch*)                cpu=loongarch   ;;
+    sh*)               cpu=sh          ;;
     *)                                 ;;
 esac
 AM_CONDITIONAL(cpu_arm,       [test cpu-$cpu = cpu-arm])
@@ -236,6 +237,7 @@ AM_CONDITIONAL(cpu_s390,      [test cpu-$cpu = cpu-s390])
 AM_CONDITIONAL(cpu_alpha,     [test cpu-$cpu = cpu-alpha])
 AM_CONDITIONAL(cpu_riscv,     [test cpu-$cpu = cpu-riscv])
 AM_CONDITIONAL(cpu_loongarch, [test cpu-$cpu = cpu-loongarch])
+AM_CONDITIONAL(cpu_sh,      [test cpu-$cpu = cpu-sh])
 
 # Test x87 if both, x87 and sse2 available
 ac_cv_test_x86_x87=
index f71b77c..59b3f4d 100644 (file)
@@ -554,6 +554,10 @@ ldr     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2
 ldi     _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2
 ldxr    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
 ldxi    _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *(O2+O3)
+ldxbr   _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O2 += O3, O1 = *O2
+ldxbi   _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O2 += O3, O1 = *O2
+ldxar   _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2, O2 += O3
+ldxai   _c  _uc  _s  _us  _i  _ui  _l  _f  _d  O1 = *O2, O2 += O3
 @end example
 
 @item Store operations
@@ -565,6 +569,10 @@ str     _c       _s       _i       _l  _f  _d  *O1 = O2
 sti     _c       _s       _i       _l  _f  _d  *O1 = O2
 stxr    _c       _s       _i       _l  _f  _d  *(O1+O2) = O3
 stxi    _c       _s       _i       _l  _f  _d  *(O1+O2) = O3
+stxbr   _c       _s       _i       _l  _f  _d  O2 += O1, *O2 = O3
+stxbi   _c       _s       _i       _l  _f  _d  O2 += O1, *O2 = O3
+stxar   _c       _s       _i       _l  _f  _d  *O2 = O3, O2 += O1
+stxai   _c       _s       _i       _l  _f  _d  *O2 = O3, O2 += O1
 @end example
 Note that the unsigned type modifier is not available, as the store
 only writes to the 1, 2, 4 or 8 sized memory address.
diff --git a/deps/lightning/gnulib b/deps/lightning/gnulib
new file mode 160000 (submodule)
index 0000000..e54b645
--- /dev/null
@@ -0,0 +1 @@
+Subproject commit e54b645fc6b8422562327443bda575c65d931fbd
index 4ab4a0a..8fb270a 100644 (file)
@@ -154,6 +154,8 @@ typedef jit_int32_t         jit_fpr_t;
 #  include <lightning/jit_riscv.h>
 #elif defined(__loongarch__)
 #  include <lightning/jit_loongarch.h>
+#elif defined(__sh__)
+#  include <lightning/jit_sh.h>
 #endif
 
 #define jit_flag_node          0x0001  /* patch node not absolute */
@@ -1211,6 +1213,119 @@ typedef enum {
 #define jit_hmuli_u(u,v,w)     jit_new_node_www(jit_code_hmuli_u,u,v,w)
     jit_code_hmulr_u,          jit_code_hmuli_u,
 
+#define jit_ldxbr_c(u,v,w)     jit_new_node_www(jit_code_ldxbr_c,u,v,w)
+#define jit_ldxbi_c(u,v,w)     jit_new_node_www(jit_code_ldxbi_c,u,v,w)
+    jit_code_ldxbr_c,          jit_code_ldxbi_c,
+#define jit_ldxar_c(u,v,w)     jit_new_node_www(jit_code_ldxar_c,u,v,w)
+#define jit_ldxai_c(u,v,w)     jit_new_node_www(jit_code_ldxai_c,u,v,w)
+    jit_code_ldxar_c,          jit_code_ldxai_c,
+#define jit_ldxbr_uc(u,v,w)    jit_new_node_www(jit_code_ldxbr_uc,u,v,w)
+#define jit_ldxbi_uc(u,v,w)    jit_new_node_www(jit_code_ldxbi_uc,u,v,w)
+    jit_code_ldxbr_uc,         jit_code_ldxbi_uc,
+#define jit_ldxar_uc(u,v,w)    jit_new_node_www(jit_code_ldxar_uc,u,v,w)
+#define jit_ldxai_uc(u,v,w)    jit_new_node_www(jit_code_ldxai_uc,u,v,w)
+    jit_code_ldxar_uc,         jit_code_ldxai_uc,
+#define jit_ldxbr_s(u,v,w)     jit_new_node_www(jit_code_ldxbr_s,u,v,w)
+#define jit_ldxbi_s(u,v,w)     jit_new_node_www(jit_code_ldxbi_s,u,v,w)
+    jit_code_ldxbr_s,          jit_code_ldxbi_s,
+#define jit_ldxar_s(u,v,w)     jit_new_node_www(jit_code_ldxar_s,u,v,w)
+#define jit_ldxai_s(u,v,w)     jit_new_node_www(jit_code_ldxai_s,u,v,w)
+    jit_code_ldxar_s,          jit_code_ldxai_s,
+#define jit_ldxbr_us(u,v,w)    jit_new_node_www(jit_code_ldxbr_us,u,v,w)
+#define jit_ldxbi_us(u,v,w)    jit_new_node_www(jit_code_ldxbi_us,u,v,w)
+    jit_code_ldxbr_us,         jit_code_ldxbi_us,
+#define jit_ldxar_us(u,v,w)    jit_new_node_www(jit_code_ldxar_us,u,v,w)
+#define jit_ldxai_us(u,v,w)    jit_new_node_www(jit_code_ldxai_us,u,v,w)
+    jit_code_ldxar_us,         jit_code_ldxai_us,
+#define jit_ldxbr_i(u,v,w)     jit_new_node_www(jit_code_ldxbr_i,u,v,w)
+#define jit_ldxbi_i(u,v,w)     jit_new_node_www(jit_code_ldxbi_i,u,v,w)
+    jit_code_ldxbr_i,          jit_code_ldxbi_i,
+#define jit_ldxar_i(u,v,w)     jit_new_node_www(jit_code_ldxar_i,u,v,w)
+#define jit_ldxai_i(u,v,w)     jit_new_node_www(jit_code_ldxai_i,u,v,w)
+    jit_code_ldxar_i,          jit_code_ldxai_i,
+#if __WORDSIZE == 32
+#  define jit_ldxbr(u,v,w)     jit_ldxbr_i(u,v,w)
+#  define jit_ldxbi(u,v,w)     jit_ldxbi_i(u,v,w)
+#  define jit_ldxar(u,v,w)     jit_ldxar_i(u,v,w)
+#  define jit_ldxai(u,v,w)     jit_ldxai_i(u,v,w)
+#else
+#  define jit_ldxbr(u,v,w)     jit_ldxbr_l(u,v,w)
+#  define jit_ldxbi(u,v,w)     jit_ldxbi_l(u,v,w)
+#  define jit_ldxar(u,v,w)     jit_ldxar_l(u,v,w)
+#  define jit_ldxai(u,v,w)     jit_ldxai_l(u,v,w)
+#  define jit_ldxbr_ui(u,v,w)  jit_new_node_www(jit_code_ldxbr_ui,u,v,w)
+#  define jit_ldxbi_ui(u,v,w)  jit_new_node_www(jit_code_ldxbi_ui,u,v,w)
+#  define jit_ldxar_ui(u,v,w)  jit_new_node_www(jit_code_ldxar_ui,u,v,w)
+#  define jit_ldxai_ui(u,v,w)  jit_new_node_www(jit_code_ldxai_ui,u,v,w)
+#  define jit_ldxbr_l(u,v,w)   jit_new_node_www(jit_code_ldxbr_l,u,v,w)
+#  define jit_ldxbi_l(u,v,w)   jit_new_node_www(jit_code_ldxbi_l,u,v,w)
+#  define jit_ldxar_l(u,v,w)   jit_new_node_www(jit_code_ldxar_l,u,v,w)
+#  define jit_ldxai_l(u,v,w)   jit_new_node_www(jit_code_ldxai_l,u,v,w)
+#endif
+    jit_code_ldxbr_ui,         jit_code_ldxbi_ui,
+    jit_code_ldxar_ui,         jit_code_ldxai_ui,
+    jit_code_ldxbr_l,          jit_code_ldxbi_l,
+    jit_code_ldxar_l,          jit_code_ldxai_l,
+#  define jit_ldxbr_f(u,v,w)   jit_new_node_www(jit_code_ldxbr_f,u,v,w)
+#  define jit_ldxbi_f(u,v,w)   jit_new_node_www(jit_code_ldxbi_f,u,v,w)
+#  define jit_ldxar_f(u,v,w)   jit_new_node_www(jit_code_ldxar_f,u,v,w)
+#  define jit_ldxai_f(u,v,w)   jit_new_node_www(jit_code_ldxai_f,u,v,w)
+    jit_code_ldxbr_f,          jit_code_ldxbi_f,
+    jit_code_ldxar_f,          jit_code_ldxai_f,
+#  define jit_ldxbr_d(u,v,w)   jit_new_node_www(jit_code_ldxbr_d,u,v,w)
+#  define jit_ldxbi_d(u,v,w)   jit_new_node_www(jit_code_ldxbi_d,u,v,w)
+#  define jit_ldxar_d(u,v,w)   jit_new_node_www(jit_code_ldxar_d,u,v,w)
+#  define jit_ldxai_d(u,v,w)   jit_new_node_www(jit_code_ldxai_d,u,v,w)
+    jit_code_ldxbr_d,          jit_code_ldxbi_d,
+    jit_code_ldxar_d,          jit_code_ldxai_d,
+#define jit_stxbr_c(u,v,w)     jit_new_node_www(jit_code_stxbr_c,u,v,w)
+#define jit_stxbi_c(u,v,w)     jit_new_node_www(jit_code_stxbi_c,u,v,w)
+#define jit_stxar_c(u,v,w)     jit_new_node_www(jit_code_stxar_c,u,v,w)
+#define jit_stxai_c(u,v,w)     jit_new_node_www(jit_code_stxai_c,u,v,w)
+    jit_code_stxbr_c,          jit_code_stxbi_c,
+    jit_code_stxar_c,          jit_code_stxai_c,
+#define jit_stxbr_s(u,v,w)     jit_new_node_www(jit_code_stxbr_s,u,v,w)
+#define jit_stxbi_s(u,v,w)     jit_new_node_www(jit_code_stxbi_s,u,v,w)
+#define jit_stxar_s(u,v,w)     jit_new_node_www(jit_code_stxar_s,u,v,w)
+#define jit_stxai_s(u,v,w)     jit_new_node_www(jit_code_stxai_s,u,v,w)
+    jit_code_stxbr_s,          jit_code_stxbi_s,
+    jit_code_stxar_s,          jit_code_stxai_s,
+#define jit_stxbr_i(u,v,w)     jit_new_node_www(jit_code_stxbr_i,u,v,w)
+#define jit_stxbi_i(u,v,w)     jit_new_node_www(jit_code_stxbi_i,u,v,w)
+#define jit_stxar_i(u,v,w)     jit_new_node_www(jit_code_stxar_i,u,v,w)
+#define jit_stxai_i(u,v,w)     jit_new_node_www(jit_code_stxai_i,u,v,w)
+    jit_code_stxbr_i,          jit_code_stxbi_i,
+    jit_code_stxar_i,          jit_code_stxai_i,
+#if __WORDSIZE == 32
+#  define jit_stxbr(u,v,w)     jit_stxbr_i(u,v,w)
+#  define jit_stxbi(u,v,w)     jit_stxbi_i(u,v,w)
+#  define jit_stxar(u,v,w)     jit_stxar_i(u,v,w)
+#  define jit_stxai(u,v,w)     jit_stxai_i(u,v,w)
+#else
+#  define jit_stxbr(u,v,w)     jit_stxbr_l(u,v,w)
+#  define jit_stxbi(u,v,w)     jit_stxbi_l(u,v,w)
+#  define jit_stxar(u,v,w)     jit_stxar_l(u,v,w)
+#  define jit_stxai(u,v,w)     jit_stxai_l(u,v,w)
+#  define jit_stxbr_l(u,v,w)   jit_new_node_www(jit_code_stxbr_l,u,v,w)
+#  define jit_stxbi_l(u,v,w)   jit_new_node_www(jit_code_stxbi_l,u,v,w)
+#  define jit_stxar_l(u,v,w)   jit_new_node_www(jit_code_stxar_l,u,v,w)
+#  define jit_stxai_l(u,v,w)   jit_new_node_www(jit_code_stxai_l,u,v,w)
+#endif
+    jit_code_stxbr_l,          jit_code_stxbi_l,
+    jit_code_stxar_l,          jit_code_stxai_l,
+#  define jit_stxbr_f(u,v,w)   jit_new_node_www(jit_code_stxbr_f,u,v,w)
+#  define jit_stxbi_f(u,v,w)   jit_new_node_www(jit_code_stxbi_f,u,v,w)
+#  define jit_stxar_f(u,v,w)   jit_new_node_www(jit_code_stxar_f,u,v,w)
+#  define jit_stxai_f(u,v,w)   jit_new_node_www(jit_code_stxai_f,u,v,w)
+    jit_code_stxbr_f,          jit_code_stxbi_f,
+    jit_code_stxar_f,          jit_code_stxai_f,
+#  define jit_stxbr_d(u,v,w)   jit_new_node_www(jit_code_stxbr_d,u,v,w)
+#  define jit_stxbi_d(u,v,w)   jit_new_node_www(jit_code_stxbi_d,u,v,w)
+#  define jit_stxar_d(u,v,w)   jit_new_node_www(jit_code_stxar_d,u,v,w)
+#  define jit_stxai_d(u,v,w)   jit_new_node_www(jit_code_stxai_d,u,v,w)
+    jit_code_stxbr_d,          jit_code_stxbi_d,
+    jit_code_stxar_d,          jit_code_stxai_d,
+
     jit_code_last_code
 } jit_code_t;
 
index e21bbaa..6d3944a 100644 (file)
@@ -67,3 +67,7 @@ if cpu_loongarch
 lightning_include_HEADERS =    \
        jit_loongarch.h
 endif
+if cpu_sh
+lightning_include_HEADERS =    \
+       jit_sh.h
+endif
index 558f553..b42ae07 100644 (file)
@@ -29,6 +29,7 @@
 #define jit_swf_p()            (jit_cpu.vfp == 0)
 #define jit_hardfp_p()         jit_cpu.abi
 #define jit_ldrt_strt_p()      jit_cpu.ldrt_strt
+#define jit_post_index_p()     jit_cpu.post_index
 
 #define JIT_FP                 _R11
 typedef enum {
@@ -125,6 +126,13 @@ typedef struct {
      * is in arm mode, or the reverse, what may cause a crash upon return
      * of that function if generating jit for a relative jump.
      */
+    /* Apparently a qemu 8.1.3 and possibly others bug, that treat
+     * ldrT Rt, [Rn, #+-<immN>]! and ldrT Rt, [Rn], #+/-<immN>
+     * identically, as a pre-index but the second one should adjust
+     * Rn after the load.
+     * The syntax for only offseting is ldrT Rt{, [Rn, #+/-<immN>}]
+     */
+    jit_uint32_t post_index    : 1;
     jit_uint32_t exchange      : 1;
     /* By default assume cannot load unaligned data.
      * A3.2.1
index 460c491..e9274b6 100644 (file)
@@ -82,4 +82,13 @@ typedef enum {
 #define JIT_NOREG              _NOREG
 } jit_reg_t;
 
+typedef struct {
+    jit_uint32_t popcntb       : 1;
+} jit_cpu_t;
+
+/*
+ * Initialization
+ */
+extern jit_cpu_t               jit_cpu;
+
 #endif /* _jit_ppc_h */
index a730d73..d350bab 100644 (file)
@@ -177,6 +177,13 @@ typedef jit_uint64_t               jit_regset_t;
 #  define JIT_RET              _A0
 #  define JIT_FRET             _FA0
 typedef jit_uint64_t           jit_regset_t;
+#elif defined(__sh__)
+#  define JIT_RA0              _R4
+#  define JIT_FA0              _XF4
+#  define JIT_SP               _R15
+#  define JIT_RET              _R0
+#  define JIT_FRET             _XF0
+typedef jit_uint32_t           jit_regset_t;
 #endif
 
 #define jit_data(u,v,w)                _jit_data(_jit,u,v,w)
@@ -350,17 +357,19 @@ extern jit_node_t *_jit_data(jit_state_t*, const void*,
 #define jit_cc_a0_cnd          0x00000100      /* arg1 is a conditinally set register */
 #define jit_cc_a1_reg          0x00000200      /* arg1 is a register */
 #define jit_cc_a1_chg          0x00000400      /* arg1 is modified */
-#define jit_cc_a1_int          0x00000800      /* arg1 is immediate word */
-#define jit_cc_a1_flt          0x00001000      /* arg1 is immediate float */
-#define jit_cc_a1_dbl          0x00002000      /* arg1 is immediate double */
-#define jit_cc_a1_arg          0x00004000      /* arg1 is an argument node */
-#define jit_cc_a1_rlh          0x00008000      /* arg1 is a register pair */
-#define jit_cc_a2_reg          0x00010000      /* arg2 is a register */
-#define jit_cc_a2_chg          0x00020000      /* arg2 is modified */
-#define jit_cc_a2_int          0x00100000      /* arg2 is immediate word */
-#define jit_cc_a2_flt          0x00200000      /* arg2 is immediate float */
-#define jit_cc_a2_dbl          0x00400000      /* arg2 is immediate double */
-#define jit_cc_a2_rlh          0x00800000      /* arg2 is a register pair */
+#define jit_cc_a1_dep          0x00000800      /* arg1 is incremented
+                                                * cannot set jit_cc_a1_chg */
+#define jit_cc_a1_int          0x00001000      /* arg1 is immediate word */
+#define jit_cc_a1_flt          0x00002000      /* arg1 is immediate float */
+#define jit_cc_a1_dbl          0x00004000      /* arg1 is immediate double */
+#define jit_cc_a1_arg          0x00008000      /* arg1 is an argument node */
+#define jit_cc_a1_rlh          0x00010000      /* arg1 is a register pair */
+#define jit_cc_a2_reg          0x00020000      /* arg2 is a register */
+#define jit_cc_a2_chg          0x00040000      /* arg2 is modified */
+#define jit_cc_a2_int          0x00080000      /* arg2 is immediate word */
+#define jit_cc_a2_flt          0x00100000      /* arg2 is immediate float */
+#define jit_cc_a2_dbl          0x00200000      /* arg2 is immediate double */
+#define jit_cc_a2_rlh          0x00400000      /* arg2 is a register pair */
 
 #if __ia64__ || (__sparc__ && __WORDSIZE == 64)
 extern void
@@ -445,7 +454,7 @@ typedef struct jit_value    jit_value_t;
 typedef struct jit_compiler    jit_compiler_t;
 typedef struct jit_function    jit_function_t;
 typedef struct jit_register    jit_register_t;
-#if __arm__
+#if __arm__ || __sh__
 #  if DISASSEMBLER
 typedef struct jit_data_info   jit_data_info_t;
 #  endif
@@ -520,7 +529,7 @@ typedef struct {
     jit_node_t         *node;
 } jit_patch_t;
 
-#if __arm__ && DISASSEMBLER
+#if (__arm__ || __sh__) && DISASSEMBLER
 struct jit_data_info {
     jit_uword_t                  code;         /* pointer in code buffer */
     jit_word_t           length;       /* length of constant vector */
@@ -746,6 +755,25 @@ struct jit_compiler {
            jit_word_t    length;       /* length of instrs/values vector */
        } vector;
     } consts;
+#elif defined(__sh__)
+#  if DISASSEMBLER
+    struct {
+       jit_data_info_t  *ptr;
+       jit_word_t        offset;
+       jit_word_t        length;
+    } data_info;                       /* constant pools information */
+#  endif
+    jit_bool_t mode_d;
+    jit_bool_t no_flag;
+    jit_bool_t uses_fpu;
+    struct {
+       jit_uint8_t      *data;         /* pointer to code */
+       jit_word_t        size;         /* size data */
+       jit_word_t        offset;       /* pending patches */
+       jit_word_t        length;       /* number of pending constants */
+       jit_int32_t       values[1024]; /* pending constants */
+       jit_word_t        patches[2048];
+    } consts;
 #endif
 #if GET_JIT_SIZE
     /* Temporary storage to calculate instructions length */
diff --git a/deps/lightning/include/lightning/jit_sh.h b/deps/lightning/include/lightning/jit_sh.h
new file mode 100644 (file)
index 0000000..25ba582
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2020  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paul Cercueil
+ */
+
+#ifndef _jit_sh_h
+#define _jit_sh_h
+
+#define JIT_HASH_CONSTS                0
+#define JIT_NUM_OPERANDS       2
+
+typedef enum {
+#define jit_r(i)               (JIT_R0 + (i))
+#define jit_r_num()            3
+#define jit_v(i)               (JIT_V0 + (i))
+#define jit_v_num()            6
+#define jit_f(i)               (JIT_F0 - (i) * 2)
+#ifdef __SH_FPU_ANY__
+#    define jit_f_num()                8
+#else
+#    define jit_f_num()                0
+#endif
+       _R0,
+
+       /* caller-saved temporary registers */
+#define JIT_R0                 _R1
+#define JIT_R1                 _R2
+#define JIT_R2                 _R3
+       _R1,    _R2,    _R3,
+
+       /* argument registers */
+       _R4,    _R5,    _R6,    _R7,
+
+       /* callee-saved registers */
+#define JIT_V0                 _R8
+#define JIT_V1                 _R9
+#define JIT_V2                 _R10
+#define JIT_V3                 _R11
+#define JIT_V4                 _R12
+#define JIT_V5                 _R13
+       _R8,    _R9,    _R10,   _R11,   _R12,   _R13,
+
+#define JIT_FP                 _R14
+       _R14,
+       _R15,
+
+       _GBR,
+
+       /* floating-point registers */
+#define JIT_F0                 _F14
+#define JIT_F1                 _F12
+#define JIT_F2                 _F10
+#define JIT_F3                 _F8
+#define JIT_F4                 _F6
+#define JIT_F5                 _F4
+#define JIT_F6                 _F2
+#define JIT_F7                 _F0
+       _F0,    _F1,    _F2,    _F3,    _F4,    _F5,    _F6,    _F7,
+       _F8,    _F9,    _F10,   _F11,   _F12,   _F13,   _F14,   _F15,
+
+       /* Banked floating-point registers */
+       _XF0,   _XF1,   _XF2,   _XF3,   _XF4,   _XF5,   _XF6,   _XF7,
+       _XF8,   _XF9,   _XF10,  _XF11,  _XF12,  _XF13,  _XF14,  _XF15,
+
+#define JIT_NOREG              _NOREG
+       _NOREG,
+} jit_reg_t;
+
+#endif /* _jit_sh_h */
index 04b5f92..2343709 100644 (file)
@@ -17,7 +17,7 @@
 AM_CFLAGS = -I$(top_builddir)/include -I$(top_srcdir)/include  \
        -D_GNU_SOURCE $(LIGHTNING_CFLAGS)
 liblightning_LTLIBRARIES = liblightning.la
-liblightning_la_LDFLAGS = -version-info 2:1:0
+liblightning_la_LDFLAGS = -version-info 2:2:0
 
 AM_CPPFLAGS =
 if get_jit_size
@@ -83,6 +83,9 @@ EXTRA_DIST =                  \
        jit_s390-cpu.c          \
        jit_s390-fpu.c          \
        jit_s390-sz.c           \
+       jit_sh.c                \
+       jit_sh-cpu.c            \
+       jit_sh-sz.c             \
        jit_sparc.c             \
        jit_sparc-cpu.c         \
        jit_sparc-fpu.c         \
index b0bc26f..6a5805f 100644 (file)
@@ -210,6 +210,8 @@ typedef union {
     jit_int32_t                w;
 #  undef ui
 } instr_t;
+#  define s9_p(d)                      ((d) >= -256 && (d) <= 255)
+#  define u12_p(d)                     ((d) >= 0 && (d) <= 4095)
 #  define s26_p(d)                     ((d) >= -33554432 && (d) <= 33554431)
 #  define ii(i)                                *_jit->pc.ui++ = i
 #  define ldr(r0,r1)                   ldr_l(r0,r1)
@@ -322,18 +324,40 @@ typedef union {
 #  define A64_LDRWI                    0xb9400000
 #  define A64_LDRSWI                   0xb9800000
 #  define A64_STRB                     0x38206800
+#  define A64_STRB_B                   0x38000c00
+#  define A64_STRB_A                   0x38000400
 #  define A64_LDRB                     0x38606800
-#  define A64_LDRSB                    0x38e06800
+#  define A64_LDRB_B                   0x38400c00
+#  define A64_LDRB_A                   0x38400400
+#  define A64_LDRSB                    0x38a06800
+#  define A64_LDRSB_B                  0x38800c00
+#  define A64_LDRSB_A                  0x38800400
 #  define A64_STR                      0xf8206800
+#  define A64_STR_B                    0xf8000c00
+#  define A64_STR_A                    0xf8000400
 #  define A64_LDR                      0xf8606800
+#  define A64_LDR_B                    0xf8400c00
+#  define A64_LDR_A                    0xf8400400
 #  define A64_LDAXR                    0xc85ffc00
 #  define A64_STLXR                    0xc800fc00
 #  define A64_STRH                     0x78206800
+#  define A64_STRH_B                   0x78000c00
+#  define A64_STRH_A                   0x78000400
 #  define A64_LDRH                     0x78606800
+#  define A64_LDRH_B                   0x78400c00
+#  define A64_LDRH_A                   0x78400400
 #  define A64_LDRSH                    0x78a06800
+#  define A64_LDRSH_B                  0x78800c00
+#  define A64_LDRSH_A                  0x78800400
 #  define A64_STRW                     0xb8206800
+#  define A64_STRW_B                   0xb8000c00
+#  define A64_STRW_A                   0xb8000400
 #  define A64_LDRW                     0xb8606800
+#  define A64_LDRW_B                   0xb8400c00
+#  define A64_LDRW_A                   0xb8400400
 #  define A64_LDRSW                    0xb8a06800
+#  define A64_LDRSW_B                  0xb8800c00
+#  define A64_LDRSW_A                  0xb8800400
 #  define A64_STURB                    0x38000000
 #  define A64_LDURB                    0x38400000
 #  define A64_LDURSB                   0x38800000
@@ -448,38 +472,60 @@ typedef union {
 #  define LDRSB(Rt,Rn,Rm)              oxxx(A64_LDRSB,Rt,Rn,Rm)
 #  define LDRSBI(Rt,Rn,Imm12)          oxxi(A64_LDRSBI,Rt,Rn,Imm12)
 #  define LDURSB(Rt,Rn,Imm9)           oxx9(A64_LDURSB,Rt,Rn,Imm9)
+#  define LDRSB_B(Rt,Rn,Imm9)          oxxs9(A64_LDRSB_B,Rt,Rn,Imm9)
+#  define LDRSB_A(Rt,Rn,Imm9)          oxxs9(A64_LDRSB_A,Rt,Rn,Imm9)
 #  define LDRB(Rt,Rn,Rm)               oxxx(A64_LDRB,Rt,Rn,Rm)
 #  define LDRBI(Rt,Rn,Imm12)           oxxi(A64_LDRBI,Rt,Rn,Imm12)
 #  define LDURB(Rt,Rn,Imm9)            oxx9(A64_LDURB,Rt,Rn,Imm9)
+#  define LDRB_B(Rt,Rn,Imm9)           oxxs9(A64_LDRB_B,Rt,Rn,Imm9)
+#  define LDRB_A(Rt,Rn,Imm9)           oxxs9(A64_LDRB_A,Rt,Rn,Imm9)
 #  define LDRSH(Rt,Rn,Rm)              oxxx(A64_LDRSH,Rt,Rn,Rm)
 #  define LDRSHI(Rt,Rn,Imm12)          oxxi(A64_LDRSHI,Rt,Rn,Imm12)
 #  define LDURSH(Rt,Rn,Imm9)           oxx9(A64_LDURSH,Rt,Rn,Imm9)
+#  define LDRSH_B(Rt,Rn,Imm9)          oxxs9(A64_LDRSH_B,Rt,Rn,Imm9)
+#  define LDRSH_A(Rt,Rn,Imm9)          oxxs9(A64_LDRSH_A,Rt,Rn,Imm9)
 #  define LDRH(Rt,Rn,Rm)               oxxx(A64_LDRH,Rt,Rn,Rm)
 #  define LDRHI(Rt,Rn,Imm12)           oxxi(A64_LDRHI,Rt,Rn,Imm12)
 #  define LDURH(Rt,Rn,Imm9)            oxx9(A64_LDURH,Rt,Rn,Imm9)
+#  define LDRH_B(Rt,Rn,Imm9)           oxxs9(A64_LDRH_B,Rt,Rn,Imm9)
+#  define LDRH_A(Rt,Rn,Imm9)           oxxs9(A64_LDRH_A,Rt,Rn,Imm9)
 #  define LDRSW(Rt,Rn,Rm)              oxxx(A64_LDRSW,Rt,Rn,Rm)
 #  define LDRSWI(Rt,Rn,Imm12)          oxxi(A64_LDRSWI,Rt,Rn,Imm12)
 #  define LDURSW(Rt,Rn,Imm9)           oxx9(A64_LDURSW,Rt,Rn,Imm9)
+#  define LDRSW_B(Rt,Rn,Imm9)          oxxs9(A64_LDRSW_B,Rt,Rn,Imm9)
+#  define LDRSW_A(Rt,Rn,Imm9)          oxxs9(A64_LDRSW_A,Rt,Rn,Imm9)
 #  define LDRW(Rt,Rn,Rm)               oxxx(A64_LDRW,Rt,Rn,Rm)
 #  define LDRWI(Rt,Rn,Imm12)           oxxi(A64_LDRWI,Rt,Rn,Imm12)
 #  define LDURW(Rt,Rn,Imm9)            oxx9(A64_LDURW,Rt,Rn,Imm9)
+#  define LDRW_B(Rt,Rn,Imm9)           oxxs9(A64_LDRW_B,Rt,Rn,Imm9)
+#  define LDRW_A(Rt,Rn,Imm9)           oxxs9(A64_LDRW_A,Rt,Rn,Imm9)
 #  define LDR(Rt,Rn,Rm)                        oxxx(A64_LDR,Rt,Rn,Rm)
 #  define LDRI(Rt,Rn,Imm12)            oxxi(A64_LDRI,Rt,Rn,Imm12)
 #  define LDUR(Rt,Rn,Imm9)             oxx9(A64_LDUR,Rt,Rn,Imm9)
+#  define LDR_B(Rt,Rn,Imm9)            oxxs9(A64_LDR_B,Rt,Rn,Imm9)
+#  define LDR_A(Rt,Rn,Imm9)            oxxs9(A64_LDR_A,Rt,Rn,Imm9)
 #  define LDAXR(Rt,Rn)                 o_xx(A64_LDAXR,Rt,Rn)
 #  define STLXR(Rs,Rt,Rn)              oxxx(A64_STLXR,Rs,Rn,Rt)
 #  define STRB(Rt,Rn,Rm)               oxxx(A64_STRB,Rt,Rn,Rm)
 #  define STRBI(Rt,Rn,Imm12)           oxxi(A64_STRBI,Rt,Rn,Imm12)
 #  define STURB(Rt,Rn,Imm9)            oxx9(A64_STURB,Rt,Rn,Imm9)
+#  define STRB_B(Rt,Rn,Imm9)           oxxs9(A64_STRB_B,Rt,Rn,Imm9)
+#  define STRB_A(Rt,Rn,Imm9)           oxxs9(A64_STRB_A,Rt,Rn,Imm9)
 #  define STRH(Rt,Rn,Rm)               oxxx(A64_STRH,Rt,Rn,Rm)
 #  define STRHI(Rt,Rn,Imm12)           oxxi(A64_STRHI,Rt,Rn,Imm12)
 #  define STURH(Rt,Rn,Imm9)            oxx9(A64_STURH,Rt,Rn,Imm9)
+#  define STRH_B(Rt,Rn,Imm9)           oxxs9(A64_STRH_B,Rt,Rn,Imm9)
+#  define STRH_A(Rt,Rn,Imm9)           oxxs9(A64_STRH_A,Rt,Rn,Imm9)
 #  define STRW(Rt,Rn,Rm)               oxxx(A64_STRW,Rt,Rn,Rm)
 #  define STRWI(Rt,Rn,Imm12)           oxxi(A64_STRWI,Rt,Rn,Imm12)
 #  define STURW(Rt,Rn,Imm9)            oxx9(A64_STURW,Rt,Rn,Imm9)
+#  define STRW_B(Rt,Rn,Imm9)           oxxs9(A64_STRW_B,Rt,Rn,Imm9)
+#  define STRW_A(Rt,Rn,Imm9)           oxxs9(A64_STRW_A,Rt,Rn,Imm9)
 #  define STR(Rt,Rn,Rm)                        oxxx(A64_STR,Rt,Rn,Rm)
 #  define STRI(Rt,Rn,Imm12)            oxxi(A64_STRI,Rt,Rn,Imm12)
 #  define STUR(Rt,Rn,Imm9)             oxx9(A64_STUR,Rt,Rn,Imm9)
+#  define STR_B(Rt,Rn,Imm9)            oxxs9(A64_STR_B,Rt,Rn,Imm9)
+#  define STR_A(Rt,Rn,Imm9)            oxxs9(A64_STR_A,Rt,Rn,Imm9)
 #  define LDPI(Rt,Rt2,Rn,Simm7)                oxxx7(A64_LDP|XS,Rt,Rt2,Rn,Simm7)
 #  define STPI(Rt,Rt2,Rn,Simm7)                oxxx7(A64_STP|XS,Rt,Rt2,Rn,Simm7)
 #  define LDPI_PRE(Rt,Rt2,Rn,Simm7)    oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7)
@@ -502,6 +548,8 @@ static void _oxxx(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 static void _oxxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define oxx9(Op,Rd,Rn,Imm9)          _oxx9(_jit,Op,Rd,Rn,Imm9)
 static void _oxx9(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define oxxs9(Op,Rd,Rn,Imm9)         _oxxs9(_jit,Op,Rd,Rn,Imm9)
+static void _oxxs9(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ox19(Op,Rd,Simm19)           _ox19(_jit,Op,Rd,Simm19)
 static void _ox19(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define oc19(Op,Cc,Simm19)           _oc19(_jit,Op,Cc,Simm19)
@@ -658,48 +706,41 @@ static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define ldr_c(r0,r1)                 LDRSBI(r0,r1,0)
 #  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
-#  define ldr_uc(r0,r1)                        _ldr_uc(_jit,r0,r1)
-static void _ldr_uc(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldr_uc(r0,r1)                        LDRBI(r0, r1, 0)
 #  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
 static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldr_s(r0,r1)                 LDRSHI(r0,r1,0)
 #  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
 static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
-#  define ldr_us(r0,r1)                        _ldr_us(_jit,r0,r1)
-static void _ldr_us(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldr_us(r0,r1)                        LDRHI(r0, r1, 0)
 #  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
 static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldr_i(r0,r1)                 LDRSWI(r0,r1,0)
 #  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
 static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
-#  define ldr_ui(r0,r1)                        _ldr_ui(_jit,r0,r1)
-static void _ldr_ui(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define ldr_ui(r0,r1)                        LDRWI(r0, r1, 0)
 #  define ldi_ui(r0,i0)                        _ldi_ui(_jit,r0,i0)
 static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldr_l(r0,r1)                 LDRI(r0,r1,0)
 static void _ldr_l(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define ldi_l(r0,i0)                 _ldi_l(_jit,r0,i0)
 static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
-#  define ldxr_c(r0,r1,r2)             _ldxr_c(_jit,r0,r1,r2)
-static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxr_c(r0,r1,r2)             LDRSB(r0, r1, r2)
 #  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
 static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
-static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxr_uc(r0,r1,r2)            LDRB(r0, r1, r2)
 #  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
 static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define ldxr_s(r0,r1,r2)             LDRSH(r0,r1,r2)
 #  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
 static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ldxr_us(r0,r1,r2)            _ldxr_us(_jit,r0,r1,r2)
-static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxr_us(r0,r1,r2)            LDRH(r0, r1, r2)
 #  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
 static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define ldxr_i(r0,r1,r2)             LDRSW(r0,r1,r2)
 #  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
 static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ldxr_ui(r0,r1,r2)            _ldxr_ui(_jit,r0,r1,r2)
-static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxr_ui(r0,r1,r2)            LDRW(r0, r1, r2)
 #  define ldxi_ui(r0,r1,i0)            _ldxi_ui(_jit,r0,r1,i0)
 static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define ldxr_l(r0,r1,r2)             LDR(r0,r1,r2)
@@ -709,6 +750,48 @@ static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define unldi(r0, i0, i1)            generic_unldi(r0, i0, i1)
 #  define unldr_u(r0, r1, i0)          generic_unldr_u(r0, r1, i0)
 #  define unldi_u(r0, i0, i1)          generic_unldi_u(r0, i0, i1)
+#  define ldxbr_c(r0, r1, r2)          generic_ldxbr_c(r0, r1, r2)
+#  define ldxbi_c(r0, r1, i0)          _ldxbi_c(_jit, r0, r1, i0)
+static void _ldxbi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_uc(r0, r1, r2)         generic_ldxbr_uc(r0, r1, r2)
+#  define ldxbi_uc(r0, r1, i0)         _ldxbi_uc(_jit, r0, r1, i0)
+static void _ldxbi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_s(r0, r1, r2)          generic_ldxbr_s(r0, r1, r2)
+#  define ldxbi_s(r0, r1, i0)          _ldxbi_s(_jit, r0, r1, i0)
+static void _ldxbi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_us(r0, r1, r2)         generic_ldxbr_us(r0, r1, r2)
+#  define ldxbi_us(r0, r1, i0)         _ldxbi_us(_jit, r0, r1, i0)
+static void _ldxbi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_i(r0, r1, r2)          generic_ldxbr_i(r0, r1, r2)
+#  define ldxbi_i(r0, r1, i0)          _ldxbi_i(_jit, r0, r1, i0)
+static void _ldxbi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_ui(r0, r1, r2)         generic_ldxbr_ui(r0, r1, r2)
+#  define ldxbi_ui(r0, r1, i0)         _ldxbi_ui(_jit, r0, r1, i0)
+static void _ldxbi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_l(r0, r1, r2)          generic_ldxbr_l(r0, r1, r2)
+#  define ldxbi_l(r0, r1, i0)          _ldxbi_l(_jit, r0, r1, i0)
+static void _ldxbi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_c(r0, r1, r2)          generic_ldxar_c(r0, r1, r2)
+#  define ldxai_c(r0, r1, i0)          _ldxai_c(_jit, r0, r1, i0)
+static void _ldxai_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_uc(r0, r1, r2)         generic_ldxar_uc(r0, r1, r2)
+#  define ldxai_uc(r0, r1, i0)         _ldxai_uc(_jit, r0, r1, i0)
+static void _ldxai_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_s(r0, r1, r2)          generic_ldxar_s(r0, r1, r2)
+#  define ldxai_s(r0, r1, i0)          _ldxai_s(_jit, r0, r1, i0)
+static void _ldxai_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_us(r0, r1, r2)         generic_ldxar_us(r0, r1, r2)
+#  define ldxai_us(r0, r1, i0)         _ldxai_us(_jit, r0, r1, i0)
+static void _ldxai_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_i(r0, r1, r2)          generic_ldxar_i(r0, r1, r2)
+#  define ldxai_i(r0, r1, i0)          _ldxai_i(_jit, r0, r1, i0)
+static void _ldxai_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_ui(r0, r1, r2)         generic_ldxar_ui(r0, r1, r2)
+#  define ldxai_ui(r0, r1, i0)         _ldxai_ui(_jit, r0, r1, i0)
+static void _ldxai_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_l(r0, r1, r2)          generic_ldxar_l(r0, r1, r2)
+#  define ldxai_l(r0, r1, i0)          _ldxai_l(_jit, r0, r1, i0)
+static void _ldxai_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define str_c(r0,r1)                 STRBI(r1,r0,0)
 #  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
 static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
@@ -735,6 +818,30 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define unstr(r0, r1, i0)            generic_unstr(r0, r1, i0)
 #  define unsti(i0, r0, i1)            generic_unsti(i0, r0, i1)
+#  define stxbr_c(r0,r1,r2)            generic_stxbr_c(r0,r1,r2)
+#  define stxbi_c(i0,r0,r1)            _stxbi_c(_jit,i0,r0,r1)
+static void _stxbi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxbr_s(r0,r1,r2)            generic_stxbr_s(r0,r1,r2)
+#  define stxbi_s(i0,r0,r1)            _stxbi_s(_jit,i0,r0,r1)
+static void _stxbi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxbr_i(r0,r1,r2)            generic_stxbr_i(r0,r1,r2)
+#  define stxbi_i(i0,r0,r1)            _stxbi_i(_jit,i0,r0,r1)
+static void _stxbi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxbr_l(r0,r1,r2)            generic_stxbr_l(r0,r1,r2)
+#  define stxbi_l(i0,r0,r1)            _stxbi_l(_jit,i0,r0,r1)
+static void _stxbi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxar_c(r0,r1,r2)            generic_stxar_c(r0,r1,r2)
+#  define stxai_c(i0,r0,r1)            _stxai_c(_jit,i0,r0,r1)
+static void _stxai_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxar_s(r0,r1,r2)            generic_stxar_s(r0,r1,r2)
+#  define stxai_s(i0,r0,r1)            _stxai_s(_jit,i0,r0,r1)
+static void _stxai_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxar_i(r0,r1,r2)            generic_stxar_i(r0,r1,r2)
+#  define stxai_i(i0,r0,r1)            _stxai_i(_jit,i0,r0,r1)
+static void _stxai_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxar_l(r0,r1,r2)            generic_stxar_l(r0,r1,r2)
+#  define stxai_l(i0,r0,r1)            _stxai_l(_jit,i0,r0,r1)
+static void _stxai_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
 static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
@@ -937,6 +1044,22 @@ _oxx9(jit_state_t *_jit, jit_int32_t Op,
     ii(i.w);
 }
 
+static void
+_oxxs9(jit_state_t *_jit, jit_int32_t Op,
+      jit_int32_t Rd, jit_int32_t Rn, jit_int32_t Imm9)
+{
+    instr_t    i;
+    assert(!(Rd   &       ~0x1f));
+    assert(!(Rn   &       ~0x1f));
+    assert(s9_p(Imm9));
+    assert(!(Op   & ~0xffe00c00));
+    i.w = Op;
+    i.Rd.b = Rd;
+    i.Rn.b = Rn;
+    i.imm9.b = Imm9;
+    ii(i.w);
+}
+
 static void
 _ox19(jit_state_t *_jit, jit_int32_t Op, jit_int32_t Rd, jit_int32_t Simm19)
 {
@@ -1837,15 +1960,6 @@ _ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_unget_reg(reg);
 }
 
-static void
-_ldr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    LDRBI(r0, r1, 0);
-#if 0
-    extr_uc(r0, r0);
-#endif
-}
-
 static void
 _ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
@@ -1866,15 +1980,6 @@ _ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_unget_reg(reg);
 }
 
-static void
-_ldr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    LDRHI(r0, r1, 0);
-#if 0
-    extr_us(r0, r0);
-#endif
-}
-
 static void
 _ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
@@ -1895,15 +2000,6 @@ _ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_unget_reg(reg);
 }
 
-static void
-_ldr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
-{
-    LDRWI(r0, r1, 0);
-#if 0
-    extr_ui(r0, r0);
-#endif
-}
-
 static void
 _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
@@ -1924,13 +2020,6 @@ _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
     jit_unget_reg(reg);
 }
 
-static void
-_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
-{
-    LDRSB(r0, r1, r2);
-    extr_c(r0, r0);
-}
-
 static void
 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -1945,16 +2034,6 @@ _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
        LDRSB(r0, r1, rn(reg));
        jit_unget_reg(reg);
     }
-    extr_c(r0, r0);
-}
-
-static void
-_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
-{
-    LDRB(r0, r1, r2);
-#if 0
-    extr_uc(r0, r0);
-#endif
 }
 
 static void
@@ -1971,9 +2050,6 @@ _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
        ldr_uc(r0, rn(reg));
        jit_unget_reg(reg);
     }
-#if 0
-    extr_uc(r0, r0);
-#endif
 }
 
 static void
@@ -1992,15 +2068,6 @@ _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     }
 }
 
-static void
-_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
-{
-    LDRH(r0, r1, r2);
-#if 0
-    extr_us(r0, r0);
-#endif
-}
-
 static void
 _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -2015,9 +2082,6 @@ _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
        LDRH(r0, r1, rn(reg));
        jit_unget_reg(reg);
     }
-#if 0
-    extr_us(r0, r0);
-#endif
 }
 
 static void
@@ -2036,15 +2100,6 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     }
 }
 
-static void
-_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
-{
-    LDRW(r0, r1, r2);
-#if 0
-    extr_ui(r0, r0);
-#endif
-}
-
 static void
 _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -2059,9 +2114,6 @@ _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
        LDRW(r0, r1, rn(reg));
        jit_unget_reg(reg);
     }
-#if 0
-    extr_ui(r0, r0);
-#endif
 }
 
 static void
@@ -2080,6 +2132,104 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     }
 }
 
+static void
+_ldxbi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRSB_B(r0, r1, i0);
+    else               generic_ldxbi_c(r0, r1, i0);
+}
+
+static void
+_ldxai_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRSB_A(r0, r1, i0);
+    else               generic_ldxai_c(r0, r1, i0);
+}
+
+static void
+_ldxbi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRB_B(r0, r1, i0);
+    else               generic_ldxbi_uc(r0, r1, i0);
+}
+
+static void
+_ldxai_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRB_A(r0, r1, i0);
+    else               generic_ldxai_uc(r0, r1, i0);
+}
+
+static void
+_ldxbi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRSH_B(r0, r1, i0);
+    else               generic_ldxbi_s(r0, r1, i0);
+}
+
+static void
+_ldxai_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRSH_A(r0, r1, i0);
+    else               generic_ldxai_s(r0, r1, i0);
+}
+
+static void
+_ldxbi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRH_B(r0, r1, i0);
+    else               generic_ldxbi_us(r0, r1, i0);
+}
+
+static void
+_ldxai_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRH_A(r0, r1, i0);
+    else               generic_ldxai_us(r0, r1, i0);
+}
+
+static void
+_ldxbi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRSW_B(r0, r1, i0);
+    else               generic_ldxbi_i(r0, r1, i0);
+}
+
+static void
+_ldxai_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRSW_A(r0, r1, i0);
+    else               generic_ldxai_i(r0, r1, i0);
+}
+
+static void
+_ldxbi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRW_B(r0, r1, i0);
+    else               generic_ldxbi_ui(r0, r1, i0);
+}
+
+static void
+_ldxai_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDRW_A(r0, r1, i0);
+    else               generic_ldxai_ui(r0, r1, i0);
+}
+
+static void
+_ldxbi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDR_B(r0, r1, i0);
+    else               generic_ldxbi_l(r0, r1, i0);
+}
+
+static void
+_ldxai_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (s9_p(i0))      LDR_A(r0, r1, i0);
+    else               generic_ldxai_l(r0, r1, i0);
+}
+
 static void
 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
@@ -2184,6 +2334,62 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
     }
 }
 
+static void
+_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (s9_p(i0))      STRB_B(r1, r0, i0);
+    else               generic_stxbi_c(r0, r1, i0);
+}
+
+static void
+_stxai_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (s9_p(i0))      STRB_A(r1, r0, i0);
+    else               generic_stxai_c(r0, r1, i0);
+}
+
+static void
+_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (s9_p(i0))      STRH_B(r1, r0, i0);
+    else               generic_stxbi_s(r0, r1, i0);
+}
+
+static void
+_stxai_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (s9_p(i0))      STRH_A(r1, r0, i0);
+    else               generic_stxai_s(r0, r1, i0);
+}
+
+static void
+_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (s9_p(i0))      STRW_B(r1, r0, i0);
+    else               generic_stxbi_i(r0, r1, i0);
+}
+
+static void
+_stxai_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (s9_p(i0))      STRW_A(r1, r0, i0);
+    else               generic_stxai_i(r0, r1, i0);
+}
+
+static void
+_stxbi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (s9_p(i0))      STR_B(r1, r0, i0);
+    else               generic_stxbi_l(r0, r1, i0);
+}
+
+static void
+_stxai_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (s9_p(i0))      STR_A(r1, r0, i0);
+    else               generic_stxai_l(r0, r1, i0);
+}
+
 static void
 _casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
       jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
index 51f40ce..9e8c9ca 100644 (file)
 #  define A64_FDIV                     0x1e201800
 #  define A64_FADD                     0x1e202800
 #  define A64_FSUB                     0x1e203800
+#  define A64_FLDSTR                   0x3c206800
+#  define A64_FLDSTU                   0x3d000000
+#  define A64_FLDST                    0x3c000000
+#  define A64_FLDST_A                  0x3c000400
+#  define A64_FLDST_B                  0x3c000c00
+#  define FLDRB(Rt,Rn,Rm)              vldstr(0,A64_FLDSTR,1,Rm,Rn,Rt)
+#  define FLDRH(Rt,Rn,Rm)              vldstr(1,A64_FLDSTR,1,Rm,Rn,Rt)
+#  define FLDRS(Rt,Rn,Rm)              vldstr(2,A64_FLDSTR,1,Rm,Rn,Rt)
+#  define FLDRD(Rt,Rn,Rm)              vldstr(3,A64_FLDSTR,1,Rm,Rn,Rt)
+#  define FLDRQ(Rt,Rn,Rm)              vldstr(0,A64_FLDSTR,3,Rm,Rn,Rt)
+#  define FSTRB(Rt,Rn,Rm)              vldstr(0,A64_FLDSTR,0,Rm,Rn,Rt)
+#  define FSTRH(Rt,Rn,Rm)              vldstr(1,A64_FLDSTR,0,Rm,Rn,Rt)
+#  define FSTRS(Rt,Rn,Rm)              vldstr(2,A64_FLDSTR,0,Rm,Rn,Rt)
+#  define FSTRD(Rt,Rn,Rm)              vldstr(3,A64_FLDSTR,0,Rm,Rn,Rt)
+#  define FSTRQ(Rt,Rn,Rm)              vldstr(0,A64_FLDSTR,2,Rm,Rn,Rt)
+#  define FLDRBI(Rt,Rn,Imm9)           vldst(0,A64_FLDST,1,Imm9,Rn,Rt)
+#  define FLDRHI(Rt,Rn,Imm9)           vldst(1,A64_FLDST,1,Imm9,Rn,Rt)
+#  define FLDRSI(Rt,Rn,Imm9)           vldst(2,A64_FLDST,1,Imm9,Rn,Rt)
+#  define FLDRDI(Rt,Rn,Imm9)           vldst(3,A64_FLDST,1,Imm9,Rn,Rt)
+#  define FLDRQI(Rt,Rn,Imm9)           vldst(0,A64_FLDST,3,Imm9,Rn,Rt)
+#  define FLDRB_B(Rt,Rn,Imm9)          vldst(0,A64_FLDST_B,1,Imm9,Rn,Rt)
+#  define FLDRH_B(Rt,Rn,Imm9)          vldst(1,A64_FLDST_B,1,Imm9,Rn,Rt)
+#  define FLDRS_B(Rt,Rn,Imm9)          vldst(2,A64_FLDST_B,1,Imm9,Rn,Rt)
+#  define FLDRD_B(Rt,Rn,Imm9)          vldst(3,A64_FLDST_B,1,Imm9,Rn,Rt)
+#  define FLDRQ_B(Rt,Rn,Imm9)          vldst(0,A64_FLDST_B,3,Imm9,Rn,Rt)
+#  define FLDRB_A(Rt,Rn,Imm9)          vldst(0,A64_FLDST_A,1,Imm9,Rn,Rt)
+#  define FLDRH_A(Rt,Rn,Imm9)          vldst(1,A64_FLDST_A,1,Imm9,Rn,Rt)
+#  define FLDRS_A(Rt,Rn,Imm9)          vldst(2,A64_FLDST_A,1,Imm9,Rn,Rt)
+#  define FLDRD_A(Rt,Rn,Imm9)          vldst(3,A64_FLDST_A,1,Imm9,Rn,Rt)
+#  define FLDRQ_A(Rt,Rn,Imm9)          vldst(0,A64_FLDST_A,3,Imm9,Rn,Rt)
+#  define FSTRBI(Rt,Rn,Imm9)           vldst(0,A64_FLDST,0,Imm9,Rn,Rt)
+#  define FSTRHI(Rt,Rn,Imm9)           vldst(1,A64_FLDST,0,Imm9,Rn,Rt)
+#  define FSTRSI(Rt,Rn,Imm9)           vldst(2,A64_FLDST,0,Imm9,Rn,Rt)
+#  define FSTRDI(Rt,Rn,Imm9)           vldst(3,A64_FLDST,0,Imm9,Rn,Rt)
+#  define FSTRQI(Rt,Rn,Imm9)           vldst(0,A64_FLDST,2,Imm9,Rn,Rt)
+#  define FSTRB_B(Rt,Rn,Imm9)          vldst(0,A64_FLDST_B,0,Imm9,Rn,Rt)
+#  define FSTRH_B(Rt,Rn,Imm9)          vldst(1,A64_FLDST_B,0,Imm9,Rn,Rt)
+#  define FSTRS_B(Rt,Rn,Imm9)          vldst(2,A64_FLDST_B,0,Imm9,Rn,Rt)
+#  define FSTRD_B(Rt,Rn,Imm9)          vldst(3,A64_FLDST_B,0,Imm9,Rn,Rt)
+#  define FSTRQ_B(Rt,Rn,Imm9)          vldst(0,A64_FLDST_B,2,Imm9,Rn,Rt)
+#  define FSTRB_A(Rt,Rn,Imm9)          vldst(0,A64_FLDST_A,0,Imm9,Rn,Rt)
+#  define FSTRH_A(Rt,Rn,Imm9)          vldst(1,A64_FLDST_A,0,Imm9,Rn,Rt)
+#  define FSTRS_A(Rt,Rn,Imm9)          vldst(2,A64_FLDST_A,0,Imm9,Rn,Rt)
+#  define FSTRD_A(Rt,Rn,Imm9)          vldst(3,A64_FLDST_A,0,Imm9,Rn,Rt)
+#  define FSTRQ_A(Rt,Rn,Imm9)          vldst(0,A64_FLDST_A,2,Imm9,Rn,Rt)
+#  define FLDRBU(Rt,Rn,Imm12)          vldstu(0,A64_FLDSTU,1,Imm12,Rn,Rt)
+#  define FLDRHU(Rt,Rn,Imm12)          vldstu(1,A64_FLDSTU,1,Imm12,Rn,Rt)
+#  define FLDRSU(Rt,Rn,Imm12)          vldstu(2,A64_FLDSTU,1,Imm12,Rn,Rt)
+#  define FLDRDU(Rt,Rn,Imm12)          vldstu(3,A64_FLDSTU,1,Imm12,Rn,Rt)
+#  define FLDRQU(Rt,Rn,Imm12)          vldstu(0,A64_FLDSTU,3,Imm12,Rn,Rt)
+#  define FSTRBU(Rt,Rn,Imm12)          vldstu(0,A64_FLDSTU,0,Imm12,Rn,Rt)
+#  define FSTRHU(Rt,Rn,Imm12)          vldstu(1,A64_FLDSTU,0,Imm12,Rn,Rt)
+#  define FSTRSU(Rt,Rn,Imm12)          vldstu(2,A64_FLDSTU,0,Imm12,Rn,Rt)
+#  define FSTRDU(Rt,Rn,Imm12)          vldstu(3,A64_FLDSTU,0,Imm12,Rn,Rt)
+#  define FSTRQU(Rt,Rn,Imm12)          vldstu(0,A64_FLDSTU,2,Imm12,Rn,Rt)
 #  define CNT(Rd,Rn)                   vqo_vv(0,A64_CNT,Rn,Rd)
 #  define ADDV(Rd,Rn)                  vqo_vv(0,A64_ADDV,Rn,Rd)
 #  define FCMPES(Rn,Rm)                        os_vv(A64_FCMPE,0,Rn,Rm)
@@ -101,6 +156,15 @@ static void _os_vv(jit_state_t*,jit_int32_t,
 #  define vqo_vv(Q,Op,Rn,Rd)           _vqo_vv(_jit,Q,Op,Rn,Rd)
 static void _vqo_vv(jit_state_t*,jit_int32_t,
                    jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vldstr(size,Op,opc,Rm,Rn,Rt) _vldstr(_jit,size,Op,opc,Rm,Rn,Rt)
+static void _vldstr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vldst(size,Op,opc,Imm9,Rn,Rt)        _vldst(_jit,size,Op,opc,Imm9,Rn,Rt)
+static void _vldst(jit_state_t*,jit_int32_t,jit_int32_t,
+                  jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define vldstu(size,Op,opc,Imm12,Rn,Rt) _vldstu(_jit,size,Op,opc,Imm12,Rn,Rt)
+static void _vldstu(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define popcntr(r0,r1)               _popcntr(_jit,r0,r1);
 static void _popcntr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define truncr_f_i(r0,r1)            _truncr_f_i(_jit,r0,r1)
@@ -140,6 +204,12 @@ static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
 static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
 static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_f(r0,r1,r2)            generic_ldxbr_f(r0,r1,r2)
+#  define ldxbi_f(r0,r1,i0)            _ldxbi_f(_jit,r0,r1,i0)
+static void _ldxbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_f(r0,r1,r2)            generic_ldxar_f(r0,r1,r2)
+#  define ldxai_f(r0,r1,i0)            _ldxai_f(_jit,r0,r1,i0)
+static void _ldxai_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define unldr_x(r0, r1, i0)          generic_unldr_x(r0, r1, i0)
 #  define unldi_x(r0, i0, i1)          generic_unldi_x(r0, i0, i1)
 #  define str_f(r0,r1)                 _str_f(_jit,r0,r1)
@@ -150,6 +220,12 @@ static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
 static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define stxi_f(i0,r0,r1)             _stxi_f(_jit,i0,r0,r1)
 static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxbr_f(r0,r1,r2)            generic_stxbr_f(r0,r1,r2)
+#  define stxbi_f(i0,r0,r1)            _stxbi_f(_jit,i0,r0,r1)
+static void _stxbi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxar_f(r0,r1,r2)            generic_stxar_f(r0,r1,r2)
+#  define stxai_f(i0,r0,r1)            _stxai_f(_jit,i0,r0,r1)
+static void _stxai_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define unstr_x(r0, r1, i0)          generic_unstr_x(r0, r1, i0)
 #  define unsti_x(i0, r0, i1)          generic_unsti_x(i0, r0, i1)
 #  define movr_f(r0,r1)                        _movr_f(_jit,r0,r1)
@@ -267,6 +343,12 @@ static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
 static void _ldxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_d(r0,r1,i0)             _ldxi_d(_jit,r0,r1,i0)
 static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_d(r0,r1,r2)            generic_ldxbr_d(r0,r1,r2)
+#  define ldxbi_d(r0,r1,i0)            _ldxbi_d(_jit,r0,r1,i0)
+static void _ldxbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_d(r0,r1,r2)            generic_ldxar_d(r0,r1,r2)
+#  define ldxai_d(r0,r1,i0)            _ldxai_d(_jit,r0,r1,i0)
+static void _ldxai_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define str_d(r0,r1)                 _str_d(_jit,r0,r1)
 static void _str_d(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define sti_d(i0,r0)                 _sti_d(_jit,i0,r0)
@@ -275,6 +357,12 @@ static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
 static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
 static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxbr_d(r0,r1,r2)            generic_stxbr_d(r0,r1,r2)
+#  define stxbi_d(i0,r0,r1)            _stxbi_d(_jit,i0,r0,r1)
+static void _stxbi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxar_d(r0,r1,r2)            generic_stxar_d(r0,r1,r2)
+#  define stxai_d(i0,r0,r1)            _stxai_d(_jit,i0,r0,r1)
+static void _stxai_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define movr_d(r0,r1)                        _movr_d(_jit,r0,r1)
 static void _movr_d(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define movi_d(r0,i0)                        _movi_d(_jit,r0,i0)
@@ -450,6 +538,69 @@ _vqo_vv(jit_state_t *_jit, jit_int32_t Q,
     ii(i.w);
 }
 
+static void
+_vldstr(jit_state_t *_jit, jit_int32_t ldst_size,
+       jit_int32_t Op, jit_int32_t opc, jit_int32_t Rm,
+       jit_int32_t Rn, jit_int32_t Rt)
+{
+    instr_t    i;
+    assert(!(Rm &       ~0x1f));
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rt &       ~0x1f));
+    assert(!(opc &       ~0x3));
+    assert(!(ldst_size & ~0x3));
+    assert(!(Op & ~0x3f20fc00));
+    i.w = Op;
+    i.ldst_size.b = ldst_size;
+    i.opc.b = opc;
+    i.Rm.b = Rm;
+    i.Rn.b = Rn;
+    i.Rt.b = Rt;
+    ii(i.w);
+}
+
+static void
+_vldst(jit_state_t *_jit, jit_int32_t ldst_size,
+       jit_int32_t Op, jit_int32_t opc, jit_int32_t Imm9,
+       jit_int32_t Rn, jit_int32_t Rt)
+{
+    instr_t    i;
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rt &       ~0x1f));
+    assert(!(opc &       ~0x3));
+    assert(s9_p(Imm9));
+    assert(!(ldst_size & ~0x3));
+    assert(!(Op & ~0x3f200c00));
+    i.w = Op;
+    i.ldst_size.b = ldst_size;
+    i.opc.b = opc;
+    i.imm9.b = Imm9;
+    i.Rn.b = Rn;
+    i.Rt.b = Rt;
+    ii(i.w);
+}
+
+static void
+_vldstu(jit_state_t *_jit, jit_int32_t ldst_size,
+       jit_int32_t Op, jit_int32_t opc, jit_int32_t Imm12,
+       jit_int32_t Rn, jit_int32_t Rt)
+{
+    instr_t    i;
+    assert(!(Rn &       ~0x1f));
+    assert(!(Rt &       ~0x1f));
+    assert(!(opc &       ~0x3));
+    assert(u12_p(Imm12));
+    assert(!(ldst_size & ~0x3));
+    assert(!(Op & ~0x3f000000));
+    i.w = Op;
+    i.ldst_size.b = ldst_size;
+    i.opc.b = opc;
+    i.imm12.b = Imm12;
+    i.Rn.b = Rn;
+    i.Rt.b = Rt;
+    ii(i.w);
+}
+
 #define fopi(name)                                                     \
 static void                                                            \
 _##name##i_f(jit_state_t *_jit,                                                \
@@ -532,11 +683,15 @@ fopi(div)
 static void
 _ldr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#if 1
+    FLDRSU(r0, r1, 0);
+#else
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     ldr_i(rn(reg), r1);
     FMOVSW(r0, rn(reg));
     jit_unget_reg(reg);
+#endif
 }
 
 static void
@@ -544,39 +699,107 @@ _ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
+#if 1
+    movi(rn(reg), i0);
+    ldr_f(r0, rn(reg));
+#else
     ldi_i(rn(reg), i0);
     FMOVSW(r0, rn(reg));
+#endif
     jit_unget_reg(reg);
 }
 
 static void
 _ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if 1
+    FLDRS(r0, r1, r2);
+#else
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     ldxr_i(rn(reg), r1, r2);
     FMOVSW(r0, rn(reg));
     jit_unget_reg(reg);
+#endif
 }
 
 static void
 _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FLDRSI(r0, r1, i0);
+    else if (u12_p(i0))
+       FLDRSU(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_f(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#else
     reg = jit_get_reg(jit_class_gpr);
     ldxi_i(rn(reg), r1, i0);
     FMOVSW(r0, rn(reg));
     jit_unget_reg(reg);
+#endif
+}
+
+static void
+_ldxbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FLDRS_B(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_f(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#else
+    reg = jit_get_reg(jit_class_gpr);
+    ldxbi_i(rn(reg), r1, i0);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+#endif
+}
+
+static void
+_ldxai_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FLDRS_A(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxar_f(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#else
+    reg = jit_get_reg(jit_class_gpr);
+    ldxai_i(rn(reg), r1, i0);
+    FMOVSW(r0, rn(reg));
+    jit_unget_reg(reg);
+#endif
 }
 
 static void
 _str_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#if 1
+    FSTRSU(r1, r0, 0);
+#else
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     FMOVWS(rn(reg), r1);
     str_i(r0, rn(reg));
     jit_unget_reg(reg);
+#endif
 }
 
 static void
@@ -584,29 +807,93 @@ _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
+#if 1
+    movi(rn(reg), i0);
+    str_f(rn(reg), r0);
+#else
     FMOVWS(rn(reg), r0);
     sti_i(i0, rn(reg));
+#endif
     jit_unget_reg(reg);
 }
 
 static void
 _stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if 1
+    FSTRS(r2, r1, r0);
+#else
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     FMOVWS(rn(reg), r2);
     stxr_i(r0, r1, rn(reg));
     jit_unget_reg(reg);
+#endif
 }
 
 static void
 _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FSTRSI(r1, r0, i0);
+    else if (u12_p(i0))
+       FSTRSU(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_f(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+#else
     reg = jit_get_reg(jit_class_gpr);
     FMOVWS(rn(reg), r1);
     stxi_i(i0, r0, rn(reg));
     jit_unget_reg(reg);
+#endif
+}
+
+static void
+_stxbi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FSTRS_B(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxbr_f(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+#else
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r1);
+    stxbi_i(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+#endif
+}
+
+static void
+_stxai_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FSTRS_A(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxar_f(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+#else
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVWS(rn(reg), r1);
+    stxai_i(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+#endif
 }
 
 static void
@@ -759,11 +1046,15 @@ dopi(div)
 static void
 _ldr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#if 1
+    FLDRDU(r0, r1, 0);
+#else
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     ldr_l(rn(reg), r1);
     FMOVDX(r0, rn(reg));
     jit_unget_reg(reg);
+#endif
 }
 
 static void
@@ -771,39 +1062,107 @@ _ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
+#if 1
+    movi(rn(reg), i0);
+    ldr_d(r0, rn(reg));
+#else
     ldi_l(rn(reg), i0);
     FMOVDX(r0, rn(reg));
+#endif
     jit_unget_reg(reg);
 }
 
 static void
 _ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if 1
+    FLDRD(r0, r1, r2);
+#else
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     ldxr_l(rn(reg), r1, r2);
     FMOVDX(r0, rn(reg));
     jit_unget_reg(reg);
+#endif
 }
 
 static void
 _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FLDRDI(r0, r1, i0);
+    else if (u12_p(i0))
+       FLDRDU(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxr_d(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#else
     reg = jit_get_reg(jit_class_gpr);
     ldxi_l(rn(reg), r1, i0);
     FMOVDX(r0, rn(reg));
     jit_unget_reg(reg);
+#endif
+}
+
+static void
+_ldxbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FLDRD_B(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_d(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#else
+    reg = jit_get_reg(jit_class_gpr);
+    ldxbi_l(rn(reg), r1, i0);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+#endif
+}
+
+static void
+_ldxai_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FLDRD_A(r0, r1, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxar_d(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+#else
+    reg = jit_get_reg(jit_class_gpr);
+    ldxai_l(rn(reg), r1, i0);
+    FMOVDX(r0, rn(reg));
+    jit_unget_reg(reg);
+#endif
 }
 
 static void
 _str_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#if 1
+    FSTRDU(r1, r0, 0);
+#else
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     FMOVXD(rn(reg), r1);
     str_l(r0, rn(reg));
     jit_unget_reg(reg);
+#endif
 }
 
 static void
@@ -811,29 +1170,93 @@ _sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
+#if 1
+    movi(rn(reg), i0);
+    str_d(rn(reg), r0);
+#else
     FMOVXD(rn(reg), r0);
     sti_l(i0, rn(reg));
+#endif
     jit_unget_reg(reg);
 }
 
 static void
 _stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if 1
+    FSTRD(r2, r1, r0);
+#else
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     FMOVXD(rn(reg), r2);
     stxr_l(r0, r1, rn(reg));
     jit_unget_reg(reg);
+#endif
 }
 
 static void
 _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FSTRDI(r1, r0, i0);
+    else if (u12_p(i0))
+       FSTRDU(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxr_d(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+#else
     reg = jit_get_reg(jit_class_gpr);
     FMOVXD(rn(reg), r1);
     stxi_l(i0, r0, rn(reg));
     jit_unget_reg(reg);
+#endif
+}
+
+static void
+_stxbi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FSTRD_B(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxbr_d(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+#else
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r1);
+    stxbi_l(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+#endif
+}
+
+static void
+_stxai_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+#if 1
+    if (s9_p(i0))
+       FSTRD_A(r1, r0, i0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxar_d(rn(reg), r0, r1);
+       jit_unget_reg(reg);
+    }
+#else
+    reg = jit_get_reg(jit_class_gpr);
+    FMOVXD(rn(reg), r1);
+    stxai_l(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+#endif
 }
 
 static void
index 435bbe9..9047702 100644 (file)
@@ -1,7 +1,7 @@
 
 #if __WORDSIZE == 64
 #  if PACKED_STACK
-#define JIT_INSTR_MAX 96
+#define JIT_INSTR_MAX 64
     0, /* data */
     0, /* live */
     4, /* align */
@@ -11,7 +11,7 @@
     0, /* #name */
     0, /* #note */
     0, /* label */
-    96,        /* prolog */
+    64,        /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
@@ -43,7 +43,7 @@
     0, /* putargi_l */
     4, /* va_start */
     8, /* va_arg */
-    12,        /* va_arg_d */
+    8, /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     20,        /* addi */
     8, /* movnr */
     8, /* movzr */
     28,        /* casr */
-    36,        /* casi */
+    40,        /* casi */
     4, /* extr_c */
     4, /* exti_c */
     4, /* extr_uc */
     16,        /* ldi_ui */
     4, /* ldr_l */
     16,        /* ldi_l */
-    8, /* ldxr_c */
-    20,        /* ldxi_c */
+    4, /* ldxr_c */
+    16,        /* ldxi_c */
     4, /* ldxr_uc */
     20,        /* ldxi_uc */
     4, /* ldxr_s */
     0, /* retval_i */
     0, /* retval_ui */
     0, /* retval_l */
-    96,        /* epilog */
+    64,        /* epilog */
     0, /* arg_f */
     0, /* getarg_f */
     0, /* putargr_f */
     4, /* extr_d_f */
     4, /* movr_f */
     8, /* movi_f */
-    8, /* ldr_f */
-    20,        /* ldi_f */
-    8, /* ldxr_f */
-    24,        /* ldxi_f */
-    8, /* str_f */
-    20,        /* sti_f */
-    8, /* stxr_f */
-    24,        /* stxi_f */
+    4, /* ldr_f */
+    16,        /* ldi_f */
+    4, /* ldxr_f */
+    16,        /* ldxi_f */
+    4, /* str_f */
+    16,        /* sti_f */
+    4, /* stxr_f */
+    16,        /* stxi_f */
     8, /* bltr_f */
     16,        /* blti_f */
     8, /* bler_f */
     4, /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    12,        /* movi_d */
-    8, /* ldr_d */
-    20,        /* ldi_d */
-    8, /* ldxr_d */
-    24,        /* ldxi_d */
-    8, /* str_d */
-    20,        /* sti_d */
-    8, /* stxr_d */
-    24,        /* stxi_d */
+    16,        /* movi_d */
+    4, /* ldr_d */
+    16,        /* ldi_d */
+    4, /* ldxr_d */
+    16,        /* ldxi_d */
+    4, /* str_d */
+    16,        /* sti_d */
+    4, /* stxr_d */
+    16,        /* stxi_d */
     8, /* bltr_d */
     16,        /* blti_d */
     8, /* bler_d */
     16,        /* blei_d */
     8, /* beqr_d */
-    20,        /* beqi_d */
+    24,        /* beqi_d */
     8, /* bger_d */
     16,        /* bgei_d */
     8, /* bgtr_d */
     8, /* qlshi */
     52,        /* qlshr_u */
     8, /* qlshi_u */
-    52,        /* qrshr */
+    44,        /* qrshr */
     8, /* qrshi */
-    52,        /* qrshr_u */
+    48,        /* qrshr_u */
     8, /* qrshi_u */
     24,        /* unldr */
     44,        /* unldi */
     44,        /* unldi_u */
     20,        /* unstr */
     56,        /* unsti */
-    8, /* unldr_x */
-    20,        /* unldi_x */
-    8, /* unstr_x */
-    20,        /* unsti_x */
+    4, /* unldr_x */
+    16,        /* unldi_x */
+    4, /* unstr_x */
+    16,        /* unsti_x */
     4, /* fmar_f */
     0, /* fmai_f */
     4, /* fmsr_f */
     16,        /* hmuli */
     4, /* hmulr_u */
     16,        /* hmuli_u */
+    8, /* ldxbr_c */
+    4, /* ldxbi_c */
+    8, /* ldxar_c */
+    4, /* ldxai_c */
+    8, /* ldxbr_uc */
+    4, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    4, /* ldxai_uc */
+    8, /* ldxbr_s */
+    4, /* ldxbi_s */
+    8, /* ldxar_s */
+    4, /* ldxai_s */
+    8, /* ldxbr_us */
+    4, /* ldxbi_us */
+    8, /* ldxar_us */
+    4, /* ldxai_us */
+    8, /* ldxbr_i */
+    4, /* ldxbi_i */
+    8, /* ldxar_i */
+    4, /* ldxai_i */
+    8, /* ldxbr_ui */
+    4, /* ldxbi_ui */
+    8, /* ldxar_ui */
+    4, /* ldxai_ui */
+    8, /* ldxbr_l */
+    4, /* ldxbi_l */
+    8, /* ldxar_l */
+    4, /* ldxai_l */
+    8, /* ldxbr_f */
+    4, /* ldxbi_f */
+    8, /* ldxar_f */
+    4, /* ldxai_f */
+    8, /* ldxbr_d */
+    4, /* ldxbi_d */
+    8, /* ldxar_d */
+    4, /* ldxai_d */
+    8, /* stxbr_c */
+    4, /* stxbi_c */
+    8, /* stxar_c */
+    4, /* stxai_c */
+    8, /* stxbr_s */
+    4, /* stxbi_s */
+    8, /* stxar_s */
+    4, /* stxai_s */
+    8, /* stxbr_i */
+    4, /* stxbi_i */
+    8, /* stxar_i */
+    4, /* stxai_i */
+    8, /* stxbr_l */
+    4, /* stxbi_l */
+    8, /* stxar_l */
+    4, /* stxai_l */
+    8, /* stxbr_f */
+    4, /* stxbi_f */
+    8, /* stxar_f */
+    4, /* stxai_f */
+    8, /* stxbr_d */
+    4, /* stxbi_d */
+    8, /* stxar_d */
+    4, /* stxai_d */
 #  else        /* PACKED_STACK */
 
-#define JIT_INSTR_MAX 120
+#define JIT_INSTR_MAX 84
     0, /* data */
     0, /* live */
     12,        /* align */
     0, /* #name */
     0, /* #note */
     0, /* label */
-    120,       /* prolog */
+    84,        /* prolog */
     0, /* ellipsis */
     0, /* va_push */
     0, /* allocai */
     0, /* putargi_l */
     44,        /* va_start */
     48,        /* va_arg */
-    56,        /* va_arg_d */
+    48,        /* va_arg_d */
     0, /* va_end */
     4, /* addr */
     20,        /* addi */
     16,        /* ldi_ui */
     4, /* ldr_l */
     16,        /* ldi_l */
-    8, /* ldxr_c */
-    20,        /* ldxi_c */
+    4, /* ldxr_c */
+    16,        /* ldxi_c */
     4, /* ldxr_uc */
     20,        /* ldxi_uc */
     4, /* ldxr_s */
     0, /* retval_i */
     0, /* retval_ui */
     0, /* retval_l */
-    96,        /* epilog */
+    64,        /* epilog */
     0, /* arg_f */
     0, /* getarg_f */
     0, /* putargr_f */
     4, /* extr_d_f */
     4, /* movr_f */
     8, /* movi_f */
-    8, /* ldr_f */
-    20,        /* ldi_f */
-    8, /* ldxr_f */
-    24,        /* ldxi_f */
-    8, /* str_f */
-    20,        /* sti_f */
-    8, /* stxr_f */
-    24,        /* stxi_f */
+    4, /* ldr_f */
+    16,        /* ldi_f */
+    4, /* ldxr_f */
+    16,        /* ldxi_f */
+    4, /* str_f */
+    16,        /* sti_f */
+    4, /* stxr_f */
+    16,        /* stxi_f */
     8, /* bltr_f */
     16,        /* blti_f */
     8, /* bler_f */
     4, /* extr_d */
     4, /* extr_f_d */
     4, /* movr_d */
-    12,        /* movi_d */
-    8, /* ldr_d */
-    20,        /* ldi_d */
-    8, /* ldxr_d */
-    24,        /* ldxi_d */
-    8, /* str_d */
-    20,        /* sti_d */
-    8, /* stxr_d */
-    24,        /* stxi_d */
+    16,        /* movi_d */
+    4, /* ldr_d */
+    16,        /* ldi_d */
+    4, /* ldxr_d */
+    16,        /* ldxi_d */
+    4, /* str_d */
+    16,        /* sti_d */
+    4, /* stxr_d */
+    16,        /* stxi_d */
     8, /* bltr_d */
     16,        /* blti_d */
     8, /* bler_d */
     16,        /* blei_d */
     8, /* beqr_d */
-    20,        /* beqi_d */
+    24,        /* beqi_d */
     8, /* bger_d */
     16,        /* bgei_d */
     8, /* bgtr_d */
     8, /* qlshi */
     52,        /* qlshr_u */
     8, /* qlshi_u */
-    52,        /* qrshr */
+    44,        /* qrshr */
     8, /* qrshi */
-    52,        /* qrshr_u */
+    48,        /* qrshr_u */
     8, /* qrshi_u */
     24,        /* unldr */
     44,        /* unldi */
     44,        /* unldi_u */
     20,        /* unstr */
     56,        /* unsti */
-    8, /* unldr_x */
-    20,        /* unldi_x */
-    8, /* unstr_x */
-    20,        /* unsti_x */
+    4, /* unldr_x */
+    16,        /* unldi_x */
+    4, /* unstr_x */
+    16,        /* unsti_x */
     4, /* fmar_f */
     0, /* fmai_f */
     4, /* fmsr_f */
     16,        /* hmuli */
     4, /* hmulr_u */
     16,        /* hmuli_u */
+    8, /* ldxbr_c */
+    4, /* ldxbi_c */
+    8, /* ldxar_c */
+    4, /* ldxai_c */
+    8, /* ldxbr_uc */
+    4, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    4, /* ldxai_uc */
+    8, /* ldxbr_s */
+    4, /* ldxbi_s */
+    8, /* ldxar_s */
+    4, /* ldxai_s */
+    8, /* ldxbr_us */
+    4, /* ldxbi_us */
+    8, /* ldxar_us */
+    4, /* ldxai_us */
+    8, /* ldxbr_i */
+    4, /* ldxbi_i */
+    8, /* ldxar_i */
+    4, /* ldxai_i */
+    8, /* ldxbr_ui */
+    4, /* ldxbi_ui */
+    8, /* ldxar_ui */
+    4, /* ldxai_ui */
+    8, /* ldxbr_l */
+    4, /* ldxbi_l */
+    8, /* ldxar_l */
+    4, /* ldxai_l */
+    8, /* ldxbr_f */
+    4, /* ldxbi_f */
+    8, /* ldxar_f */
+    4, /* ldxai_f */
+    8, /* ldxbr_d */
+    4, /* ldxbi_d */
+    8, /* ldxar_d */
+    4, /* ldxai_d */
+    8, /* stxbr_c */
+    4, /* stxbi_c */
+    8, /* stxar_c */
+    4, /* stxai_c */
+    8, /* stxbr_s */
+    4, /* stxbi_s */
+    8, /* stxar_s */
+    4, /* stxai_s */
+    8, /* stxbr_i */
+    4, /* stxbi_i */
+    8, /* stxar_i */
+    4, /* stxai_i */
+    8, /* stxbr_l */
+    4, /* stxbi_l */
+    8, /* stxar_l */
+    4, /* stxai_l */
+    8, /* stxbr_f */
+    4, /* stxbi_f */
+    8, /* stxar_f */
+    4, /* stxai_f */
+    8, /* stxbr_d */
+    4, /* stxbi_d */
+    8, /* stxar_d */
+    4, /* stxai_d */
 #  endif
 #endif /* __WORDSIZE */
index bc78800..0c5a40c 100644 (file)
@@ -1292,6 +1292,24 @@ _emit_code(jit_state_t *_jit)
                name##r##type(rn(node->u.w),                            \
                              rn(node->v.w), rn(node->w.w));            \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1522,6 +1540,24 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1544,6 +1580,18 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(hton, _ul);
@@ -2119,6 +2167,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_brr
 #undef case_wrr
 #undef case_rrw
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrr
 #undef case_wr
 #undef case_rw
index fd39c0d..a5a886b 100644 (file)
     8, /* qlshi */
     40,        /* qlshr_u */
     8, /* qlshi_u */
-    40,        /* qrshr */
+    44,        /* qrshr */
     8, /* qrshi */
     40,        /* qrshr_u */
     8, /* qrshi_u */
     0, /* fnmai_d */
     20,        /* fnmsr_d */
     0, /* fnmsi_d */
-    36, /* hmulr */
-    60, /* hmuli */
+    32,        /* hmulr */
+    56,        /* hmuli */
     4, /* hmulr_u */
-    28, /* hmuli_u */
+    28,        /* hmuli_u */
+    12,        /* ldxbr_c */
+    12,        /* ldxbi_c */
+    12,        /* ldxar_c */
+    12,        /* ldxai_c */
+    8, /* ldxbr_uc */
+    8, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    12,        /* ldxbr_s */
+    12,        /* ldxbi_s */
+    12,        /* ldxar_s */
+    12,        /* ldxai_s */
+    8, /* ldxbr_us */
+    8, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    12,        /* ldxbr_ui */
+    12,        /* ldxbi_ui */
+    12,        /* ldxar_ui */
+    12,        /* ldxai_ui */
+    8, /* ldxbr_l */
+    8, /* ldxbi_l */
+    8, /* ldxar_l */
+    8, /* ldxai_l */
+    8, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    8, /* ldxbr_d */
+    8, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    8, /* stxbr_c */
+    8, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    8, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    8, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    8, /* stxbr_l */
+    8, /* stxbi_l */
+    8, /* stxar_l */
+    8, /* stxai_l */
+    8, /* stxbr_f */
+    8, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    8, /* stxbr_d */
+    8, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __WORDSIZE */
index 69bf397..a67421d 100644 (file)
@@ -917,6 +917,26 @@ _emit_code(jit_state_t *_jit)
                              rn(node->v.q.h), rn(node->w.w));          \
            case jit_code_##name##i##type:                              \
                break;
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               generic_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               generic_##name##i##type(node->u.w, rn(node->v.w),       \
+                                       rn(node->w.w));                 \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1111,6 +1131,24 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1133,6 +1171,18 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(hton, _ul);
@@ -1720,6 +1770,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_rrrr
 #undef case_rrf
 #undef case_rrw
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrr
 #undef case_wr
 #undef case_rw
index 149db9a..73004ce 100644 (file)
@@ -269,7 +269,9 @@ extern unsigned     __aeabi_uidivmod(unsigned, unsigned);
 #  define ARM_BLI                      0x0b000000
 #  define THUMB2_BLI                   0xf000d000
 /* ldr/str */
-#  define ARM_P                                0x00800000 /* positive offset */
+#  define ARM_U                                0x00800000 /* positive offset */
+#  define ARM_P                                0x01000000 /* index */
+#  define ARM_W                                0x00200000 /* writeback */
 #  define THUMB2_P                     0x00000400
 #  define THUMB2_U                     0x00000200
 #  define THUMB2_W                     0x00000100
@@ -338,9 +340,9 @@ extern unsigned     __aeabi_uidivmod(unsigned, unsigned);
 /* ldm/stm */
 #  define ARM_M                                0x08000000
 #  define ARM_M_L                      0x00100000 /* load; store if not set */
-#  define ARM_M_I                      0x00800000 /* inc; dec if not set */
-#  define ARM_M_B                      0x01000000 /* before; after if not set */
-#  define ARM_M_U                      0x00200000 /* update Rn */
+#  define ARM_M_U                      0x00800000 /* inc; dec if not set */
+#  define ARM_M_P                      0x01000000 /* before; after if not set */
+#  define ARM_M_W                      0x00200000 /* update Rn */
 #  define THUMB2_LDM_W                 0x00200000
 #  define THUMB2_LDM_P                 0x00008000
 #  define THUMB2_LDM_M                 0x00004000
@@ -717,81 +719,131 @@ static void _corrlw(jit_state_t*,int,int,int,int,int,int);
 #  define CC_BLI(cc,im)                        cb(cc,ARM_BLI,im)
 #  define BLI(im)                      CC_BLI(ARM_CC_AL,im)
 #  define T2_BLI(im)                   tb(THUMB2_BLI,im)
-#  define CC_LDRSB(cc,rt,rn,rm)                corrr(cc,ARM_LDRSB|ARM_P,rn,rt,rm)
+#  define CC_LDRSB(cc,rt,rn,rm)                corrr(cc,ARM_LDRSB|ARM_U,rn,rt,rm)
 #  define LDRSB(rt,rn,rm)              CC_LDRSB(ARM_CC_AL,rt,rn,rm)
+#  define LDRSB_B(rt,rn,rm)            corrr(ARM_CC_AL,ARM_LDRSB|ARM_P|ARM_U|ARM_W,rn,rt,rm)
+#  define LDRSB_A(rt,rn,rm)            corrr(ARM_CC_AL,ARM_LDRSB|ARM_U|ARM_W,rn,rt,rm)
 #  define T1_LDRSB(rt,rn,rm)           is(THUMB_LDRSB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_LDRSB(rt,rn,rm)           torxr(THUMB2_LDRSB,rn,rt,rm)
 #  define CC_LDRSBN(cc,rt,rn,rm)       corrr(cc,ARM_LDRSB,rn,rt,rm)
 #  define LDRSBN(rt,rn,rm)             CC_LDRSBN(ARM_CC_AL,rt,rn,rm)
-#  define CC_LDRSBI(cc,rt,rn,im)       corri8(cc,ARM_LDRSBI|ARM_P,rn,rt,im)
+#  define CC_LDRSBI(cc,rt,rn,im)       corri8(cc,ARM_LDRSBI|ARM_U,rn,rt,im)
 #  define LDRSBI(rt,rn,im)             CC_LDRSBI(ARM_CC_AL,rt,rn,im)
+#  define LDRSBI_B(rt,rn,im)           corri8(ARM_CC_AL,ARM_LDRSBI|ARM_P|ARM_U|ARM_W,rn,rt,im)
+#  define LDRSBI_A(rt,rn,im)           corri8(ARM_CC_AL,ARM_LDRSBI|ARM_U|ARM_W,rn,rt,im)
 #  define T2_LDRSBI(rt,rn,im)          torri8(THUMB2_LDRSBI|THUMB2_U,rn,rt,im)
 #  define T2_LDRSBWI(rt,rn,im)         torri12(THUMB2_LDRSBWI,rn,rt,im)
+#  define T2_LDRSBI_B(rt,rn,im)                torri8(THUMB2_LDRSBI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im)
+#  define T2_LDRSBI_A(rt,rn,im)                torri8(THUMB2_LDRSBI|THUMB2_U|THUMB2_W,rn,rt,im)
 #  define CC_LDRSBIN(cc,rt,rn,im)      corri8(cc,ARM_LDRSBI,rn,rt,im)
 #  define LDRSBIN(rt,rn,im)            CC_LDRSBIN(ARM_CC_AL,rt,rn,im)
+#  define LDRSBIN_B(rt,rn,im)          corri8(ARM_CC_AL,ARM_LDRSBI|ARM_P|ARM_W,rn,rt,im)
+#  define LDRSBIN_A(rt,rn,im)          corri8(ARM_CC_AL,ARM_LDRSBI|ARM_W,rn,rt,im)
 #  define T2_LDRSBIN(rt,rn,im)         torri8(THUMB2_LDRSBI,rn,rt,im)
-#  define CC_LDRB(cc,rt,rn,rm)         corrr(cc,ARM_LDRB|ARM_P,rn,rt,rm)
+#  define T2_LDRSBIN_B(rt,rn,im)       torri8(THUMB2_LDRSBI|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_LDRSBIN_A(rt,rn,im)       torri8(THUMB2_LDRSBI|THUMB2_W,rn,rt,im)
+#  define CC_LDRB(cc,rt,rn,rm)         corrr(cc,ARM_LDRB|ARM_U,rn,rt,rm)
 #  define LDRB(rt,rn,rm)               CC_LDRB(ARM_CC_AL,rt,rn,rm)
+#  define LDRB_B(rt,rn,rm)             corrr(ARM_CC_AL,ARM_LDRB|ARM_U|ARM_P|ARM_W,rn,rt,rm)
+#  define LDRB_A(rt,rn,rm)             corrr(ARM_CC_AL,ARM_LDRB|ARM_U|ARM_W,rn,rt,rm)
 #  define T1_LDRB(rt,rn,rm)            is(THUMB_LDRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_LDRB(rt,rn,rm)            torxr(THUMB2_LDRB,rn,rt,rm)
 #  define CC_LDRBN(cc,rt,rn,rm)                corrr(cc,ARM_LDRB,rn,rt,rm)
 #  define LDRBN(rt,rn,rm)              CC_LDRBN(ARM_CC_AL,rt,rn,rm)
-#  define CC_LDRBI(cc,rt,rn,im)                corri(cc,ARM_LDRBI|ARM_P,rn,rt,im)
+#  define CC_LDRBI(cc,rt,rn,im)                corri(cc,ARM_LDRBI|ARM_U,rn,rt,im)
 #  define LDRBI(rt,rn,im)              CC_LDRBI(ARM_CC_AL,rt,rn,im)
+#  define LDRBI_B(rt,rn,im)            corri(ARM_CC_AL,ARM_LDRBI|ARM_P|ARM_U|ARM_W,rn,rt,im)
+#  define LDRBI_A(rt,rn,im)            corri(ARM_CC_AL,ARM_LDRBI|ARM_U|ARM_W,rn,rt,im)
 #  define T1_LDRBI(rt,rn,im)           is(THUMB_LDRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_LDRBI(rt,rn,im)           torri8(THUMB2_LDRBI|THUMB2_U,rn,rt,im)
 #  define T2_LDRBWI(rt,rn,im)          torri12(THUMB2_LDRBWI,rn,rt,im)
+#  define T2_LDRBI_B(rt,rn,im)         torri8(THUMB2_LDRBI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im)
+#  define T2_LDRBI_A(rt,rn,im)         torri8(THUMB2_LDRBI|THUMB2_U|THUMB2_W,rn,rt,im)
 #  define CC_LDRBIN(cc,rt,rn,im)       corri(cc,ARM_LDRBI,rn,rt,im)
 #  define LDRBIN(rt,rn,im)             CC_LDRBIN(ARM_CC_AL,rt,rn,im)
+#  define LDRBIN_B(rt,rn,im)           corri(ARM_CC_AL,ARM_LDRBI|ARM_P|ARM_W,rn,rt,im)
+#  define LDRBIN_A(rt,rn,im)           corri(ARM_CC_AL,ARM_LDRBI|ARM_W,rn,rt,im)
 #  define T2_LDRBIN(rt,rn,im)          torri8(THUMB2_LDRBI,rn,rt,im)
-#  define CC_LDRSH(cc,rt,rn,rm)                corrr(cc,ARM_LDRSH|ARM_P,rn,rt,rm)
+#  define T2_LDRBIN_B(rt,rn,im)                torri8(THUMB2_LDRBI|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_LDRBIN_A(rt,rn,im)                torri8(THUMB2_LDRBI|THUMB2_W,rn,rt,im)
+#  define CC_LDRSH(cc,rt,rn,rm)                corrr(cc,ARM_LDRSH|ARM_U,rn,rt,rm)
 #  define LDRSH(rt,rn,rm)              CC_LDRSH(ARM_CC_AL,rt,rn,rm)
+#  define LDRSH_B(rt,rn,rm)            corrr(ARM_CC_AL,ARM_LDRSH|ARM_U|ARM_P|ARM_W,rn,rt,rm)
+#  define LDRSH_A(rt,rn,rm)            corrr(ARM_CC_AL,ARM_LDRSH|ARM_U|ARM_W,rn,rt,rm)
 #  define T1_LDRSH(rt,rn,rm)           is(THUMB_LDRSH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_LDRSH(rt,rn,rm)           torxr(THUMB2_LDRSH,rn,rt,rm)
 #  define CC_LDRSHN(cc,rt,rn,rm)       corrr(cc,ARM_LDRSH,rn,rt,rm)
 #  define LDRSHN(rt,rn,rm)             CC_LDRSHN(ARM_CC_AL,rt,rn,rm)
-#  define CC_LDRSHI(cc,rt,rn,im)       corri8(cc,ARM_LDRSHI|ARM_P,rn,rt,im)
+#  define CC_LDRSHI(cc,rt,rn,im)       corri8(cc,ARM_LDRSHI|ARM_U,rn,rt,im)
 #  define LDRSHI(rt,rn,im)             CC_LDRSHI(ARM_CC_AL,rt,rn,im)
+#  define LDRSHI_B(rt,rn,im)           corri8(ARM_CC_AL,ARM_LDRSHI|ARM_P|ARM_U|ARM_W,rn,rt,im)
+#  define LDRSHI_A(rt,rn,im)           corri8(ARM_CC_AL,ARM_LDRSHI|ARM_U|ARM_W,rn,rt,im)
 #  define T2_LDRSHI(rt,rn,im)          torri8(THUMB2_LDRSHI|THUMB2_U,rn,rt,im)
 #  define T2_LDRSHWI(rt,rn,im)         torri12(THUMB2_LDRSHWI,rn,rt,im)
+#  define T2_LDRSHI_B(rt,rn,im)                torri8(THUMB2_LDRSBI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im)
+#  define T2_LDRSHI_A(rt,rn,im)                torri8(THUMB2_LDRSBI|THUMB2_U|THUMB2_W,rn,rt,im)
 #  define CC_LDRSHIN(cc,rt,rn,im)      corri8(cc,ARM_LDRSHI,rn,rt,im)
 #  define LDRSHIN(rt,rn,im)            CC_LDRSHIN(ARM_CC_AL,rt,rn,im)
+#  define LDRSHIN_B(rt,rn,im)          corri8(ARM_CC_AL,ARM_LDRSHI|ARM_P|ARM_W,rn,rt,im)
+#  define LDRSHIN_A(rt,rn,im)          corri8(ARM_CC_AL,ARM_LDRSHI|ARM_W,rn,rt,im)
 #  define T2_LDRSHIN(rt,rn,im)         torri8(THUMB2_LDRSHI,rn,rt,im)
-#  define CC_LDRH(cc,rt,rn,rm)         corrr(cc,ARM_LDRH|ARM_P,rn,rt,rm)
+#  define T2_LDRSHIN_B(rt,rn,im)       torri8(THUMB2_LDRSHI|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_LDRSHIN_A(rt,rn,im)       torri8(THUMB2_LDRSHI|THUMB2_W,rn,rt,im)
+#  define CC_LDRH(cc,rt,rn,rm)         corrr(cc,ARM_LDRH|ARM_U,rn,rt,rm)
 #  define LDRH(rt,rn,rm)               CC_LDRH(ARM_CC_AL,rt,rn,rm)
+#  define LDRH_B(rt,rn,rm)             corrr(ARM_CC_AL,ARM_LDRH|ARM_U|ARM_P|ARM_W,rn,rt,rm)
+#  define LDRH_A(rt,rn,rm)             corrr(ARM_CC_AL,ARM_LDRH|ARM_U|ARM_W,rn,rt,rm)
 #  define T1_LDRH(rt,rn,rm)            is(THUMB_LDRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_LDRH(rt,rn,rm)            torxr(THUMB2_LDRH,rn,rt,rm)
 #  define CC_LDRHN(cc,rt,rn,rm)                corrr(cc,ARM_LDRH,rn,rt,rm)
 #  define LDRHN(rt,rn,rm)              CC_LDRHN(ARM_CC_AL,rt,rn,rm)
-#  define CC_LDRHI(cc,rt,rn,im)                corri8(cc,ARM_LDRHI|ARM_P,rn,rt,im)
+#  define CC_LDRHI(cc,rt,rn,im)                corri8(cc,ARM_LDRHI|ARM_U,rn,rt,im)
 #  define LDRHI(rt,rn,im)              CC_LDRHI(ARM_CC_AL,rt,rn,im)
+#  define LDRHI_B(rt,rn,im)            corri8(ARM_CC_AL,ARM_LDRHI|ARM_P|ARM_U|ARM_W,rn,rt,im)
+#  define LDRHI_A(rt,rn,im)            corri8(ARM_CC_AL,ARM_LDRHI|ARM_U|ARM_W,rn,rt,im)
 #  define T1_LDRHI(rt,rn,im)           is(THUMB_LDRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_LDRHI(rt,rn,im)           torri8(THUMB2_LDRHI|THUMB2_U,rn,rt,im)
 #  define T2_LDRHWI(rt,rn,im)          torri12(THUMB2_LDRHWI,rn,rt,im)
+#  define T2_LDRHI_B(rt,rn,im)         torri8(THUMB2_LDRHI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im)
+#  define T2_LDRHI_A(rt,rn,im)         torri8(THUMB2_LDRHI|THUMB2_U|THUMB2_W,rn,rt,im)
 #  define CC_LDRHIN(cc,rt,rn,im)       corri8(cc,ARM_LDRHI,rn,rt,im)
 #  define LDRHIN(rt,rn,im)             CC_LDRHIN(ARM_CC_AL,rt,rn,im)
+#  define LDRHIN_B(rt,rn,im)           corri8(ARM_CC_AL,ARM_LDRHI|ARM_P|ARM_W,rn,rt,im)
+#  define LDRHIN_A(rt,rn,im)           corri8(ARM_CC_AL,ARM_LDRHI|ARM_W,rn,rt,im)
 #  define T2_LDRHIN(rt,rn,im)          torri8(THUMB2_LDRHI,rn,rt,im)
-#  define CC_LDR(cc,rt,rn,rm)          corrr(cc,ARM_LDR|ARM_P,rn,rt,rm)
+#  define T2_LDRHIN_B(rt,rn,im)                torri8(THUMB2_LDRHI|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_LDRHIN_A(rt,rn,im)                torri8(THUMB2_LDRHI|THUMB2_W,rn,rt,im)
+#  define CC_LDR(cc,rt,rn,rm)          corrr(cc,ARM_LDR|ARM_U,rn,rt,rm)
 #  define LDR(rt,rn,rm)                        CC_LDR(ARM_CC_AL,rt,rn,rm)
+#  define LDR_B(rt,rn,rm)              corrr(ARM_CC_AL,ARM_LDR|ARM_U|ARM_P|ARM_W,rn,rt,rm)
+#  define LDR_A(rt,rn,rm)              corrr(ARM_CC_AL,ARM_LDR|ARM_U|ARM_W,rn,rt,rm)
 #  define T1_LDR(rt,rn,rm)             is(THUMB_LDR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_LDR(rt,rn,rm)             torxr(THUMB2_LDR,rn,rt,rm)
 #  define CC_LDRN(cc,rt,rn,rm)         corrr(cc,ARM_LDR,rn,rt,rm)
 #  define LDRN(rt,rn,rm)               CC_LDRN(ARM_CC_AL,rt,rn,rm)
-#  define CC_LDRI(cc,rt,rn,im)         corri(cc,ARM_LDRI|ARM_P,rn,rt,im)
+#  define CC_LDRI(cc,rt,rn,im)         corri(cc,ARM_LDRI|ARM_U,rn,rt,im)
 #  define LDRI(rt,rn,im)               CC_LDRI(ARM_CC_AL,rt,rn,im)
+#  define LDRI_B(rt,rn,im)             corri(ARM_CC_AL,ARM_LDRI|ARM_P|ARM_U|ARM_W,rn,rt,im)
+#  define LDRI_A(rt,rn,im)             corri(ARM_CC_AL,ARM_LDRI|ARM_U|ARM_W,rn,rt,im)
 #  define T1_LDRI(rt,rn,im)            is(THUMB_LDRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T1_LDRISP(rt,im)             is(THUMB_LDRISP|(_u3(rt)<<8)|_u8(im))
 #  define T2_LDRI(rt,rn,im)            torri8(THUMB2_LDRI|THUMB2_U,rn,rt,im)
 #  define T2_LDRWI(rt,rn,im)           torri12(THUMB2_LDRWI,rn,rt,im)
+#  define T2_LDRI_B(rt,rn,im)          torri8(THUMB2_LDRI|THUMB2_P|THUMB2_U|THUMB2_W,rn,rt,im)
+#  define T2_LDRI_A(rt,rn,im)          torri8(THUMB2_LDRI|THUMB2_U|THUMB2_W,rn,rt,im)
 #  define CC_LDRIN(cc,rt,rn,im)                corri(cc,ARM_LDRI,rn,rt,im)
 #  define LDRIN(rt,rn,im)              CC_LDRIN(ARM_CC_AL,rt,rn,im)
+#  define LDRIN_B(rt,rn,im)            corri(ARM_CC_AL,ARM_LDRI|ARM_P|ARM_W,rn,rt,im)
+#  define LDRIN_A(rt,rn,im)            corri(ARM_CC_AL,ARM_LDRI|ARM_W,rn,rt,im)
 #  define T2_LDRIN(rt,rn,im)           torri8(THUMB2_LDRI,rn,rt,im)
-#  define CC_LDRD(cc,rt,rn,rm)         corrr(cc,ARM_LDRD|ARM_P,rn,rt,rm)
+#  define T2_LDRIN_B(rt,rn,im)         torri8(THUMB2_LDRI|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_LDRIN_A(rt,rn,im)         torri8(THUMB2_LDRI|THUMB2_W,rn,rt,im)
+#  define CC_LDRD(cc,rt,rn,rm)         corrr(cc,ARM_LDRD|ARM_U,rn,rt,rm)
 #  define LDRD(rt,rn,rm)               CC_LDRD(ARM_CC_AL,rt,rn,rm)
-#  define T2_LDRDI(rt,rt2,rn,im)       torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im)
+#  define T2_LDRDI(rt,rt2,rn,im)       torrri8(THUMB2_LDRDI|ARM_U,rn,rt,rt2,im)
 #  define CC_LDRDN(cc,rt,rn,rm)                corrr(cc,ARM_LDRD,rn,rt,rm)
 #  define LDRDN(rd,rn,rm)              CC_LDRDN(ARM_CC_AL,rt,rn,rm)
-#  define CC_LDRDI(cc,rt,rn,im)                corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im)
+#  define CC_LDRDI(cc,rt,rn,im)                corri8(cc,ARM_LDRDI|ARM_U,rn,rt,im)
 #  define LDRDI(rt,rn,im)              CC_LDRDI(ARM_CC_AL,rt,rn,im)
 #  define CC_LDRDIN(cc,rt,rn,im)       corri8(cc,ARM_LDRDI,rn,rt,im)
 #  define LDRDIN(rt,rn,im)             CC_LDRDIN(ARM_CC_AL,rt,rn,im)
@@ -799,103 +851,133 @@ static void _corrlw(jit_state_t*,int,int,int,int,int,int);
 #  define CC_LDREX(cc,rt,rn)           corrrr(cc,ARM_LDREX,rn,rt,0xf,0xf)
 #  define LDREX(rt,rn)                 CC_LDREX(ARM_CC_AL,rt,rn)
 #  define T2_LDREX(rt,rn,im)           torrri8(THUMB2_LDREX,rn,rt,0xf,im)
-#  define CC_STRB(cc,rt,rn,rm)         corrr(cc,ARM_STRB|ARM_P,rn,rt,rm)
+#  define CC_STRB(cc,rt,rn,rm)         corrr(cc,ARM_STRB|ARM_U,rn,rt,rm)
 #  define STRB(rt,rn,rm)               CC_STRB(ARM_CC_AL,rt,rn,rm)
+#  define STRB_B(rt,rn,rm)             corrr(ARM_CC_AL,ARM_STRB|ARM_U|ARM_P|ARM_W,rn,rt,rm)
+#  define STRB_A(rt,rn,rm)             corrr(ARM_CC_AL,ARM_STRB|ARM_U|ARM_W,rn,rt,rm)
 #  define T1_STRB(rt,rn,rm)            is(THUMB_STRB|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_STRB(rt,rn,rm)            torxr(THUMB2_STRB,rn,rt,rm)
 #  define CC_STRBN(cc,rt,rn,rm)                corrr(cc,ARM_STRB,rn,rt,rm)
 #  define STRBN(rt,rn,rm)              CC_STRBN(ARM_CC_AL,rt,rn,rm)
-#  define CC_STRBI(cc,rt,rn,im)                corri(cc,ARM_STRBI|ARM_P,rn,rt,im)
+#  define CC_STRBI(cc,rt,rn,im)                corri(cc,ARM_STRBI|ARM_U,rn,rt,im)
 #  define STRBI(rt,rn,im)              CC_STRBI(ARM_CC_AL,rt,rn,im)
+#  define STRBI_B(rt,rn,im)            corri(ARM_CC_AL,ARM_STRBI|ARM_P|ARM_U|ARM_W,rn,rt,im)
+#  define STRBI_A(rt,rn,im)            corri(ARM_CC_AL,ARM_STRBI|ARM_U|ARM_W,rn,rt,im)
 #  define T1_STRBI(rt,rn,im)           is(THUMB_STRBI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_STRBI(rt,rn,im)           torri8(THUMB2_STRBI|THUMB2_U,rn,rt,im)
 #  define T2_STRBWI(rt,rn,im)          torri12(THUMB2_STRBWI,rn,rt,im)
+#  define T2_STRBI_B(rt,rn,im)         torri8(THUMB2_STRBI|THUMB2_U|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_STRBI_A(rt,rn,im)         torri8(THUMB2_STRBI|THUMB2_U|THUMB2_W,rn,rt,im)
 #  define CC_STRBIN(cc,rt,rn,im)       corri(cc,ARM_STRBI,rn,rt,im)
 #  define STRBIN(rt,rn,im)             CC_STRBIN(ARM_CC_AL,rt,rn,im)
+#  define STRBIN_B(rt,rn,im)           corri(ARM_CC_AL,ARM_STRBI|ARM_P|ARM_W,rn,rt,im)
+#  define STRBIN_A(rt,rn,im)           corri(ARM_CC_AL,ARM_STRBI|ARM_W,rn,rt,im)
 #  define T2_STRBIN(rt,rn,im)          torri8(THUMB2_STRBI,rn,rt,im)
-#  define CC_STRH(cc,rt,rn,rm)         corrr(cc,ARM_STRH|ARM_P,rn,rt,rm)
+#  define T2_STRBIN_B(rt,rn,im)                torri8(THUMB2_STRBI|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_STRBIN_A(rt,rn,im)                torri8(THUMB2_STRBI|THUMB2_W,rn,rt,im)
+#  define CC_STRH(cc,rt,rn,rm)         corrr(cc,ARM_STRH|ARM_U,rn,rt,rm)
 #  define STRH(rt,rn,rm)               CC_STRH(ARM_CC_AL,rt,rn,rm)
+#  define STRH_B(rt,rn,rm)             corrr(ARM_CC_AL,ARM_STRH|ARM_U|ARM_P|ARM_W,rn,rt,rm)
+#  define STRH_A(rt,rn,rm)             corrr(ARM_CC_AL,ARM_STRH|ARM_U|ARM_W,rn,rt,rm)
 #  define T1_STRH(rt,rn,rm)            is(THUMB_STRH|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_STRH(rt,rn,rm)            torxr(THUMB2_STRH,rn,rt,rm)
 #  define CC_STRHN(cc,rt,rn,rm)                corrr(cc,ARM_STRH,rn,rt,rm)
 #  define STRHN(rt,rn,rm)              CC_STRHN(ARM_CC_AL,rt,rn,rm)
-#  define CC_STRHI(cc,rt,rn,im)                corri8(cc,ARM_STRHI|ARM_P,rn,rt,im)
+#  define CC_STRHI(cc,rt,rn,im)                corri8(cc,ARM_STRHI|ARM_U,rn,rt,im)
 #  define STRHI(rt,rn,im)              CC_STRHI(ARM_CC_AL,rt,rn,im)
+#  define STRHI_B(rt,rn,im)            corri(ARM_CC_AL,ARM_STRHI|ARM_P|ARM_U|ARM_W,rn,rt,im)
+#  define STRHI_A(rt,rn,im)            corri(ARM_CC_AL,ARM_STRHI|ARM_U|ARM_W,rn,rt,im)
 #  define T1_STRHI(rt,rn,im)           is(THUMB_STRHI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_STRHI(rt,rn,im)           torri8(THUMB2_STRHI|THUMB2_U,rn,rt,im)
 #  define T2_STRHWI(rt,rn,im)          torri12(THUMB2_STRHWI,rn,rt,im)
+#  define T2_STRHI_B(rt,rn,im)         torri8(THUMB2_STRHI|THUMB2_U|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_STRHI_A(rt,rn,im)         torri8(THUMB2_STRHI|THUMB2_U|THUMB2_W,rn,rt,im)
 #  define CC_STRHIN(cc,rt,rn,im)       corri8(cc,ARM_STRHI,rn,rt,im)
 #  define STRHIN(rt,rn,im)             CC_STRHIN(ARM_CC_AL,rt,rn,im)
+#  define STRHIN_B(rt,rn,im)           corri8(ARM_CC_AL,ARM_STRHI|ARM_P|ARM_W,rn,rt,im)
+#  define STRHIN_A(rt,rn,im)           corri8(ARM_CC_AL,ARM_STRHI|ARM_W,rn,rt,im)
 #  define T2_STRHIN(rt,rn,im)          torri8(THUMB2_STRHI,rn,rt,im)
-#  define CC_STR(cc,rt,rn,rm)          corrr(cc,ARM_STR|ARM_P,rn,rt,rm)
+#  define T2_STRHIN_B(rt,rn,im)                torri8(THUMB2_STRHI|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_STRHIN_A(rt,rn,im)                torri8(THUMB2_STRHI|THUMB2_W,rn,rt,im)
+#  define CC_STR(cc,rt,rn,rm)          corrr(cc,ARM_STR|ARM_U,rn,rt,rm)
 #  define STR(rt,rn,rm)                        CC_STR(ARM_CC_AL,rt,rn,rm)
+#  define STR_B(rt,rn,rm)              corrr(ARM_CC_AL,ARM_STR|ARM_U|ARM_P|ARM_W,rn,rt,rm)
+#  define STR_A(rt,rn,rm)              corrr(ARM_CC_AL,ARM_STR|ARM_U|ARM_W,rn,rt,rm)
 #  define T1_STR(rt,rn,rm)             is(THUMB_STR|(_u3(rm)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T2_STR(rt,rn,rm)             torxr(THUMB2_STR,rn,rt,rm)
 #  define CC_STRN(cc,rt,rn,rm)         corrr(cc,ARM_STR,rn,rt,rm)
 #  define STRN(rt,rn,rm)               CC_STRN(ARM_CC_AL,rt,rn,rm)
-#  define CC_STRI(cc,rt,rn,im)         corri(cc,ARM_STRI|ARM_P,rn,rt,im)
+#  define CC_STRI(cc,rt,rn,im)         corri(cc,ARM_STRI|ARM_U,rn,rt,im)
 #  define STRI(rt,rn,im)               CC_STRI(ARM_CC_AL,rt,rn,im)
+#  define STRI_B(rt,rn,im)             corri(ARM_CC_AL,ARM_STRI|ARM_P|ARM_U|ARM_W,rn,rt,im)
+#  define STRI_A(rt,rn,im)             corri(ARM_CC_AL,ARM_STRI|ARM_U|ARM_W,rn,rt,im)
 #  define T1_STRI(rt,rn,im)            is(THUMB_STRI|(_u5(im)<<6)|(_u3(rn)<<3)|_u3(rt))
 #  define T1_STRISP(rt,im)             is(THUMB_STRISP|(_u3(rt)<<8)|(_u8(im)))
 #  define T2_STRI(rt,rn,im)            torri8(THUMB2_STRI|THUMB2_U,rn,rt,im)
 #  define T2_STRWI(rt,rn,im)           torri12(THUMB2_STRWI,rn,rt,im)
+#  define T2_STRI_B(rt,rn,im)          torri8(THUMB2_STRI|THUMB2_U|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_STRI_A(rt,rn,im)          torri8(THUMB2_STRI|THUMB2_U|THUMB2_W,rn,rt,im)
 #  define CC_STRIN(cc,rt,rn,im)                corri(cc,ARM_STRI,rn,rt,im)
 #  define STRIN(rt,rn,im)              CC_STRIN(ARM_CC_AL,rt,rn,im)
+#  define STRIN_B(rt,rn,im)            corri(ARM_CC_AL,ARM_STRI|ARM_P|ARM_W,rn,rt,im)
+#  define STRIN_A(rt,rn,im)            corri(ARM_CC_AL,ARM_STRI|ARM_W,rn,rt,im)
 #  define T2_STRIN(rt,rn,im)           torri8(THUMB2_STRI,rn,rt,im)
-#  define CC_STRD(cc,rt,rn,rm)         corrr(cc,ARM_STRD|ARM_P,rn,rt,rm)
+#  define T2_STRIN_B(rt,rn,im)         torri8(THUMB2_STRI|THUMB2_P|THUMB2_W,rn,rt,im)
+#  define T2_STRIN_A(rt,rn,im)         torri8(THUMB2_STRI|THUMB2_W,rn,rt,im)
+#  define CC_STRD(cc,rt,rn,rm)         corrr(cc,ARM_STRD|ARM_U,rn,rt,rm)
 #  define STRD(rt,rn,rm)               CC_STRD(ARM_CC_AL,rt,rn,rm)
 #  define CC_STRDN(cc,rt,rn,rm)                corrr(cc,ARM_STRD,rn,rt,rm)
 #  define STRDN(rt,rn,rm)              CC_STRDN(ARM_CC_AL,rt,rn,rm)
-#  define CC_STRDI(cc,rt,rn,im)                corri8(cc,ARM_STRDI|ARM_P,rn,rt,im)
+#  define CC_STRDI(cc,rt,rn,im)                corri8(cc,ARM_STRDI|ARM_U,rn,rt,im)
 #  define STRDI(rt,rn,im)              CC_STRDI(ARM_CC_AL,rt,rn,im)
-#  define T2_STRDI(rt,rt2,rn,im)       torrri8(THUMB2_STRDI|ARM_P,rn,rt,rt2,im)
+#  define T2_STRDI(rt,rt2,rn,im)       torrri8(THUMB2_STRDI|ARM_U,rn,rt,rt2,im)
 #  define CC_STRDIN(cc,rt,rn,im)       corri8(cc,ARM_STRDI,rn,rt,im)
 #  define STRDIN(rt,rn,im)             CC_STRDIN(ARM_CC_AL,rt,rn,im)
 #  define T2_STRDIN(rt,rt2,rn,im)      torrri8(THUMB2_STRDI,rn,rt,rt2,im)
 #  define CC_STREX(cc,rd,rt,rn)                corrrr(cc,ARM_STREX,rn,rd,0xf,rt)
 #  define STREX(rd,rt,rn)              CC_STREX(ARM_CC_AL,rd,rt,rn)
 #  define T2_STREX(rd,rt,rn,im)                torrri8(THUMB2_STREX,rn,rt,rd,im)
-#  define CC_LDMIA(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_I,rn,im)
+#  define CC_LDMIA(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im)
 #  define LDMIA(rn,im)                 CC_LDMIA(ARM_CC_AL,rn,im)
 #  define CC_LDM(cc,rn,im)             CC_LDMIA(cc,rn,im)
 #  define LDM(rn,im)                   LDMIA(rn,im)
 #  define T1_LDMIA(rn,im)              is(THUMB_LDMIA|(_u3(rn)<<8)|im)
 #  define T2_LDMIA(rn,im)              torl(THUMB2_LDMIA,rn,im)
-#  define CC_LDMIA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_U,rn,im)
+#  define CC_LDMIA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_U|ARM_M_W,rn,im)
 #  define LDMIA_U(rn,im)               CC_LDMIA_U(ARM_CC_AL,rn,im)
 #  define LDM_U(r0,i0)                 LDMIA_U(r0,i0)
-#  define CC_LDMIB(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B,rn,im)
+#  define CC_LDMIB(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_U|ARM_M_P,rn,im)
 #  define LDMIB(rn,im)                 CC_LDMIB(ARM_CC_AL,rn,im)
-#  define CC_LDMIB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
+#  define CC_LDMIB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_U|ARM_M_P|ARM_M_W,rn,im)
 #  define LDMIB_U(rn,im)               CC_LDMIB_U(ARM_CC_AL,rn,im)
 #  define CC_LDMDA(cc,rn,im)           corl(cc,ARM_M|ARM_M_L,rn,im)
 #  define LDMDA(rn,im)                 CC_LDMDA(ARM_CC_AL,rn,im)
-#  define CC_LDMDA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_U,rn,im)
+#  define CC_LDMDA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_W,rn,im)
 #  define LDMDA_U(rn,im)               CC_LDMDA_U(ARM_CC_AL,rn,im)
-#  define CC_LDMDB(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_B,rn,im)
+#  define CC_LDMDB(cc,rn,im)           corl(cc,ARM_M|ARM_M_L|ARM_M_P,rn,im)
 #  define LDMDB(rn,im)                 CC_LDMDB(ARM_CC_AL,rn,im)
 #  define T2_LDMDB(rn,im)              torl(THUMB2_LDMDB,rn,im)
-#  define CC_LDMDB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_B|ARM_M_U,rn,im)
+#  define CC_LDMDB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_L|ARM_M_P|ARM_M_W,rn,im)
 #  define LDMDB_U(rn,im)               CC_LDMDB_U(ARM_CC_AL,rn,im)
-#  define CC_STMIA(cc,rn,im)           corl(cc,ARM_M|ARM_M_I,rn,im)
+#  define CC_STMIA(cc,rn,im)           corl(cc,ARM_M|ARM_M_U,rn,im)
 #  define STMIA(rn,im)                 CC_STMIA(ARM_CC_AL,rn,im)
 #  define CC_STM(cc,rn,im)             CC_STMIA(cc,rn,im)
 #  define STM(rn,im)                   STMIA(rn,im)
-#  define CC_STMIA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_I|ARM_M_U,rn,im)
+#  define CC_STMIA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_U|ARM_M_W,rn,im)
 #  define STMIA_U(rn,im)               CC_STMIA_U(ARM_CC_AL,rn,im)
 #  define CC_STM_U(cc,rn,im)           CC_STMIA_U(cc,rn,im)
 #  define STM_U(rn,im)                 STMIA_U(rn,im)
-#  define CC_STMIB(cc,rn,im)           corl(cc,ARM_M|ARM_M_I|ARM_M_B,rn,im)
+#  define CC_STMIB(cc,rn,im)           corl(cc,ARM_M|ARM_M_U|ARM_M_P,rn,im)
 #  define STMIB(rn,im)                 CC_STMIB(ARM_CC_AL,rn,im)
-#  define CC_STMIB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_I|ARM_M_B|ARM_M_U,rn,im)
+#  define CC_STMIB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_U|ARM_M_P|ARM_M_W,rn,im)
 #  define STMIB_U(rn,im)               CC_STMIB_U(ARM_CC_AL,rn,im)
 #  define CC_STMDA(cc,rn,im)           corl(cc,ARM_M,rn,im)
 #  define STMDA(rn,im)                 CC_STMDA(ARM_CC_AL,rn,im)
-#  define CC_STMDA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_U,rn,im)
+#  define CC_STMDA_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_W,rn,im)
 #  define STMDA_U(rn,im)               CC_STMDA_U(ARM_CC_AL,rn,im)
-#  define CC_STMDB(cc,rn,im)           corl(cc,ARM_M|ARM_M_B,rn,im)
+#  define CC_STMDB(cc,rn,im)           corl(cc,ARM_M|ARM_M_P,rn,im)
 #  define STMDB(rn,im)                 CC_STMDB(ARM_CC_AL,rn,im)
-#  define CC_STMDB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_B|ARM_M_U,rn,im)
+#  define CC_STMDB_U(cc,rn,im)         corl(cc,ARM_M|ARM_M_P|ARM_M_W,rn,im)
 #  define STMDB_U(rn,im)               CC_STMDB_U(ARM_CC_AL,rn,im)
 #  define CC_PUSH(cc,im)               CC_STMDB_U(cc,_SP_REGNO,im)
 #  define PUSH(im)                     STMDB_U(_SP_REGNO,im)
@@ -1199,6 +1281,46 @@ static void _unldi(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
 static void _unldr_u(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #  define unldi_u(r0, i0, i1)          _unldi_u(_jit, r0, i0, i1)
 static void _unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+#  define ldxbr_c(r0, r1, r2)          _ldxbr_c(_jit,r0, r1, r2)
+static void _ldxbr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxbi_c(r0, r1, i0)          _ldxbi_c(_jit, r0, r1, i0)
+static void _ldxbi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxbr_uc(r0, r1, r2)         _ldxbr_uc(_jit,r0, r1, r2)
+static void _ldxbr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxbi_uc(r0, r1, i0)         _ldxbi_uc(_jit, r0, r1, i0)
+static void _ldxbi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxbr_s(r0, r1, r2)          _ldxbr_s(_jit,r0, r1, r2)
+static void _ldxbr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxbi_s(r0, r1, i0)          _ldxbi_s(_jit, r0, r1, i0)
+static void _ldxbi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxbr_us(r0, r1, r2)         _ldxbr_us(_jit,r0, r1, r2)
+static void _ldxbr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxbi_us(r0, r1, i0)         _ldxbi_us(_jit, r0, r1, i0)
+static void _ldxbi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxbr_i(r0, r1, r2)          _ldxbr_i(_jit,r0, r1, r2)
+static void _ldxbr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define ldxbi_i(r0, r1, i0)          _ldxbi_i(_jit, r0, r1, i0)
+static void _ldxbi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxar_c(r0, r1, i0)          _ldxar_c(_jit, r0, r1, i0)
+static void _ldxar_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxai_c(r0, r1, i0)          _ldxai_c(_jit, r0, r1, i0)
+static void _ldxai_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxar_uc(r0, r1, i0)         _ldxar_uc(_jit, r0, r1, i0)
+static void _ldxar_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxai_uc(r0, r1, i0)         _ldxai_uc(_jit, r0, r1, i0)
+static void _ldxai_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxar_s(r0, r1, i0)          _ldxar_s(_jit, r0, r1, i0)
+static void _ldxar_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxai_s(r0, r1, i0)          _ldxai_s(_jit, r0, r1, i0)
+static void _ldxai_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxar_us(r0, r1, i0)         _ldxar_us(_jit, r0, r1, i0)
+static void _ldxar_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxai_us(r0, r1, i0)         _ldxai_us(_jit, r0, r1, i0)
+static void _ldxai_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxar_i(r0, r1, i0)          _ldxar_i(_jit, r0, r1, i0)
+static void _ldxar_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxai_i(r0, r1, i0)          _ldxai_i(_jit, r0, r1, i0)
+static void _ldxai_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #  define str_c(r0,r1)                 _str_c(_jit,r0,r1)
 static void _str_c(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
@@ -1227,6 +1349,30 @@ static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static void _unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #define unsti(i0, r0, i1)              _unsti(_jit, i0, r0, i1)
 static void _unsti(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define stxbr_c(r0, r1, r2)          _stxbr_c(_jit, r0, r1, r2)
+static void _stxbr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxbi_c(i0, r0, r1)          _stxbi_c(_jit, i0, r0, r1)
+static void _stxbi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define stxbr_s(r0, r1, r2)          _stxbr_s(_jit, r0, r1, r2)
+static void _stxbr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxbi_s(i0, r0, r1)          _stxbi_s(_jit, i0, r0, r1)
+static void _stxbi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define stxbr_i(r0, r1, r2)          _stxbr_i(_jit, r0, r1, r2)
+static void _stxbr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxbi_i(i0, r0, r1)          _stxbi_i(_jit, i0, r0, r1)
+static void _stxbi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define stxar_c(r0, r1, r2)          _stxar_c(_jit, r0, r1, r2)
+static void _stxar_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxai_c(i0, r0, r1)          _stxai_c(_jit, i0, r0, r1)
+static void _stxai_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define stxar_s(r0, r1, r2)          _stxar_s(_jit, r0, r1, r2)
+static void _stxar_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxai_s(i0, r0, r1)          _stxai_s(_jit, i0, r0, r1)
+static void _stxai_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define stxar_i(r0, r1, r2)          _stxar_i(_jit, r0, r1, r2)
+static void _stxar_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define stxai_i(i0, r0, r1)          _stxai_i(_jit, i0, r0, r1)
+static void _stxai_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 #  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
 static void _bswapr_us(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
@@ -3825,6 +3971,304 @@ _unldi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
        generic_unldi_u(r0, i0, i1);
 }
 
+static void
+_ldxbr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (!jit_thumb_p())
+       LDRSB_B(r0, r1, r2);
+    else
+       generic_ldxbr_c(r0, r1, r2);
+}
+
+static void
+_ldxbi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 >= -255 && i0 <= 255) {
+       if (jit_thumb_p()) {
+           if (i0 >= 0)
+           T2_LDRSBI_B(r0, r1, i0);
+           else
+               T2_LDRSBIN_B(r0, r1, -i0);
+       }
+       else {
+           if (i0 >= 0)
+               LDRSBI_B(r0, r1, i0);
+           else
+               LDRSBIN_B(r0, r1, -i0);
+       }
+    }
+    else
+       generic_ldxbi_c(r0, r1, i0);
+}
+
+static void
+_ldxbr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (!jit_thumb_p())
+       LDRB_B(r0, r1, r2);
+    else
+       generic_ldxbr_uc(r0, r1, r2);
+}
+
+static void
+_ldxbi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (jit_thumb_p() && i0 >= -255 && i0 <= 255) {
+       if (i0 >= 0)
+           T2_LDRBI_B(r0, r1, i0);
+       else
+           T2_LDRBIN_B(r0, r1, -i0);
+    }
+    else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) {
+       if (i0 >= 0)
+           LDRBI_B(r0, r1, i0);
+       else
+           LDRBIN_B(r0, r1, -i0);
+    }
+    else
+       generic_ldxbi_uc(r0, r1, i0);
+}
+
+static void
+_ldxbr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (!jit_thumb_p())
+       LDRSH_B(r0, r1, r2);
+    else
+       generic_ldxbr_s(r0, r1, r2);
+}
+
+static void
+_ldxbi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 >= -255 && i0 <= 255) {
+       if (jit_thumb_p()) {
+           if (i0 >= 0)
+               T2_LDRSHI_B(r0, r1, i0);
+           else
+               T2_LDRSHIN_B(r0, r1, -i0);
+       }
+       else {
+           if (i0 >= 0)
+               LDRSHI_B(r0, r1, i0);
+           else
+               LDRSHIN_B(r0, r1, -i0);
+       }
+    }
+    else
+       generic_ldxbi_s(r0, r1, i0);
+}
+
+static void
+_ldxbr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (!jit_thumb_p())
+       LDRH_B(r0, r1, r2);
+    else
+       generic_ldxbr_us(r0, r1, r2);
+}
+
+static void
+_ldxbi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (i0 >= -255 && i0 <= 255) {
+       if (jit_thumb_p()) {
+           if (i0 >= 0)
+           T2_LDRHI_B(r0, r1, i0);
+           else
+               T2_LDRHIN_B(r0, r1, -i0);
+       }
+       else {
+           if (i0 >= 0)
+               LDRHI_B(r0, r1, i0);
+           else
+               LDRHIN_B(r0, r1, -i0);
+       }
+    }
+    else
+       generic_ldxbi_us(r0, r1, i0);
+}
+
+static void
+_ldxbr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (!jit_thumb_p())
+       LDR_B(r0, r1, r2);
+    else
+       generic_ldxbr_i(r0, r1, r2);
+}
+
+static void
+_ldxbi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (jit_thumb_p() && i0 >= -255 && i0 <= 255) {
+       if (i0 >= 0)
+           T2_LDRI_B(r0, r1, i0);
+       else
+           T2_LDRIN_B(r0, r1, -i0);
+    }
+    else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) {
+       if (i0 >= 0)
+           LDRI_B(r0, r1, i0);
+       else
+           LDRIN_B(r0, r1, -i0);
+    }
+    else
+       generic_ldxbi_i(r0, r1, i0);
+}
+
+static void
+_ldxar_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_post_index_p() && !jit_thumb_p())
+       LDRSB_A(r0, r1, r2);
+    else
+       generic_ldxar_c(r0, r1, r2);
+}
+
+static void
+_ldxai_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (jit_post_index_p() && i0 >= -255 && i0 <= 255) {
+       if (jit_thumb_p()) {
+           if (i0 >= 0)
+               T2_LDRSBI_A(r0, r1, i0);
+           else
+               T2_LDRSBIN_A(r0, r1, -i0);
+       }
+       else {
+           if (i0 >= 0)
+               LDRSBI_A(r0, r1, i0);
+           else
+               LDRSBIN_A(r0, r1, -i0);
+       }
+    }
+    else
+       generic_ldxai_c(r0, r1, i0);
+}
+
+static void
+_ldxar_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_post_index_p() && !jit_thumb_p())
+       LDRB_A(r0, r1, r2);
+    else
+       generic_ldxar_uc(r0, r1, r2);
+}
+
+static void
+_ldxai_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (jit_post_index_p() &&
+       jit_thumb_p() && i0 >= -255 && i0 <= 255) {
+       if (jit_thumb_p()) {
+           if (i0 >= 0)
+           T2_LDRBI_A(r0, r1, i0);
+           else
+               T2_LDRBIN_A(r0, r1, -i0);
+       }
+    }
+    else if (jit_post_index_p() &&
+            !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) {
+       if (i0 >= 0)
+           LDRBI_A(r0, r1, i0);
+       else
+           LDRBIN_A(r0, r1, -i0);
+    }
+    else
+       generic_ldxai_uc(r0, r1, i0);
+}
+
+static void
+_ldxar_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_post_index_p() && !jit_thumb_p())
+       LDRSH_A(r0, r1, r2);
+    else
+       generic_ldxar_s(r0, r1, r2);
+}
+
+static void
+_ldxai_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (jit_post_index_p() && i0 >= -255 && i0 <= 255) {
+       if (jit_thumb_p()) {
+           if (i0 >= 0)
+           T2_LDRSHI_A(r0, r1, i0);
+           else
+               T2_LDRSHIN_A(r0, r1, -i0);
+       }
+       else {
+           if (i0 >= 0)
+               LDRSHI_A(r0, r1, i0);
+           else
+               LDRSHIN_A(r0, r1, -i0);
+       }
+    }
+    else
+       generic_ldxai_s(r0, r1, i0);
+}
+
+static void
+_ldxar_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_post_index_p() && !jit_thumb_p())
+       LDRH_A(r0, r1, r2);
+    else
+       generic_ldxar_us(r0, r1, r2);
+}
+
+static void
+_ldxai_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (jit_post_index_p() && i0 >= -255 && i0 <= 255) {
+       if (jit_thumb_p()) {
+           if (i0 >= 0)
+           T2_LDRHI_A(r0, r1, i0);
+           else
+               T2_LDRHIN_A(r0, r1, -i0);
+       }
+       else {
+           if (i0 >= 0)
+               LDRHI_A(r0, r1, i0);
+           else
+               LDRHIN_A(r0, r1, -i0);
+       }
+    }
+    else
+       generic_ldxai_us(r0, r1, i0);
+}
+
+static void
+_ldxar_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_post_index_p() && !jit_thumb_p())
+       LDR_A(r0, r1, r2);
+    else
+       generic_ldxar_i(r0, r1, r2);
+}
+
+static void
+_ldxai_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    if (jit_post_index_p() &&
+       jit_thumb_p() && i0 >= -255 && i0 <= 255) {
+       if (i0 >= 0)
+           T2_LDRI_A(r0, r1, i0);
+       else
+           T2_LDRIN_A(r0, r1, -i0);
+    }
+    else if (jit_post_index_p() &&
+            !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) {
+       if (i0 >= 0)
+           LDRI_A(r0, r1, i0);
+       else
+           LDRIN_A(r0, r1, -i0);
+    }
+    else
+       generic_ldxai_i(r0, r1, i0);
+}
+
 static void
 _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -4062,6 +4506,182 @@ _unsti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
        generic_unsti(i0, r0, i1);
 }
 
+static void
+_stxbr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (!jit_thumb_p())
+       STRB_B(r2, r1, r0);
+    else
+       generic_stxbr_c(r0, r1, r2);
+}
+
+static void
+_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_thumb_p() && i0 >= -255 && i0 <= 255) {
+       if (i0 >= 0)
+           T2_STRBI_B(r1, r0, i0);
+       else
+           T2_STRBIN_B(r1, r0, -i0);
+    }
+    else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) {
+       if (i0 >= 0)
+           STRBI_B(r1, r0, i0);
+       else
+           STRBIN_B(r1, r0, -i0);
+    }
+    else
+       generic_stxbi_c(i0, r0, r1);
+}
+
+static void
+_stxbr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (!jit_thumb_p())
+       STRH_B(r2, r1, r0);
+    else
+       generic_stxbr_s(r0, r1, r2);
+}
+
+static void
+_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (i0 >= -255 && i0 <= 255) {
+       if (jit_thumb_p()) {
+           if (i0 >= 0)
+               T2_STRHI_B(r1, r0, i0);
+           else
+               T2_STRHIN_B(r1, r0, -i0);
+       }
+       else {
+           if (i0 >= 0)
+               STRHI_B(r1, r0, i0);
+           else
+               STRHIN_B(r1, r0, -i0);
+       }
+    }
+    else
+       generic_stxbi_s(i0, r0, r1);
+}
+
+static void
+_stxbr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (!jit_thumb_p())
+       STR_B(r2, r1, r0);
+    else
+       generic_stxbr_i(r0, r1, r2);
+}
+
+static void
+_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+   if (jit_thumb_p() && i0 >= -255 && i0 <= 255) {
+       if (i0 >= 0)
+           T2_STRI_B(r1, r0, i0);
+       else
+           T2_STRIN_B(r1, r0, -i0);
+    }
+    else if (!jit_thumb_p() && i0 >= -4095 && i0 <= 4095) {
+       if (i0 >= 0)
+           STRI_B(r1, r0, i0);
+       else
+           STRIN_B(r1, r0, -i0);
+    }
+    else
+       generic_stxbi_i(i0, r0, r1);
+}
+
+static void
+_stxar_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_post_index_p() && !jit_thumb_p())
+       STRB_A(r2, r1, r0);
+    else
+       generic_stxar_c(r0, r1, r2);
+}
+
+static void
+_stxai_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_post_index_p() &&
+       jit_thumb_p() && i0 >= -255 && i0 <= 255) {
+       if (i0 >= 0)
+           T2_STRBI_A(r1, r0, i0);
+       else
+           T2_STRBIN_A(r1, r0, -i0);
+    }
+    else if (jit_post_index_p() &&
+            !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) {
+       if (i0 >= 0)
+           STRBI_A(r1, r0, i0);
+       else
+           STRBIN_A(r1, r0, -i0);
+    }
+    else
+       generic_stxai_c(i0, r0, r1);
+}
+
+static void
+_stxar_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_post_index_p() && !jit_thumb_p())
+       STRH_A(r2, r1, r0);
+    else
+       generic_stxar_s(r0, r1, r2);
+}
+
+static void
+_stxai_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    if (jit_post_index_p() &&
+       jit_thumb_p() && i0 >= -255 && i0 <= 255) {
+       if (i0 >= 0)
+           T2_STRHI_A(r1, r0, i0);
+       else
+           T2_STRHIN_A(r1, r0, -i0);
+    }
+    else if (jit_post_index_p() &&
+            !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) {
+       if (i0 >= 0)
+           STRHI_A(r1, r0, i0);
+       else
+           STRHIN_A(r1, r0, -i0);
+    }
+    else
+       generic_stxai_s(i0, r0, r1);
+}
+
+static void
+_stxar_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    if (jit_post_index_p() && !jit_thumb_p())
+       STR_A(r2, r1, r0);
+    else
+       generic_stxar_i(r0, r1, r2);
+}
+
+static void
+_stxai_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+   if (jit_post_index_p() &&
+       jit_thumb_p() && i0 >= -255 && i0 <= 255) {
+       if (i0 >= 0)
+           T2_STRI_A(r1, r0, i0);
+       else
+           T2_STRIN_A(r1, r0, -i0);
+    }
+    else if (jit_post_index_p() &&
+            !jit_thumb_p() && i0 >= -4095 && i0 <= 4095) {
+       if (i0 >= 0)
+           STRI_A(r1, r0, i0);
+       else
+           STRIN_A(r1, r0, -i0);
+    }
+    else
+       generic_stxai_i(i0, r0, r1);
+}
+
 static void
 _bswapr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -4548,11 +5168,11 @@ _patch_at(jit_state_t *_jit,
        assert((thumb.i & 0x0f700000) == ARM_LDRI);
        d = label - (instr + 8);
        if (d < 0) {
-           thumb.i &= ~ARM_P;
+           thumb.i &= ~ARM_U;
            d = -d;
        }
        else
-           thumb.i |= ARM_P;
+           thumb.i |= ARM_U;
        assert(!(d & 0xfffff000));
        u.i[0] = (thumb.i & 0xfffff000) | d;
     }
index 7ec5e9e..cbbfd59 100644 (file)
     12,        /* hmuli */
     4, /* hmulr_u */
     8, /* hmuli_u */
+    8, /* ldxbr_c */
+    4, /* ldxbi_c */
+    8, /* ldxar_c */
+    8, /* ldxai_c */
+    8, /* ldxbr_uc */
+    4, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    8, /* ldxbr_s */
+    4, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    8, /* ldxbr_us */
+    4, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    4, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    12, /* ldxbr_f */
+    12, /* ldxbi_f */
+    12, /* ldxar_f */
+    12, /* ldxai_f */
+    20, /* ldxbr_d */
+    20, /* ldxbi_d */
+    20, /* ldxar_d */
+    20, /* ldxai_d */
+    8, /* stxbr_c */
+    4, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    4, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    4, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    12, /* stxbr_f */
+    12, /* stxbi_f */
+    12, /* stxar_f */
+    12, /* stxai_f */
+    20, /* stxbr_d */
+    20, /* stxbi_d */
+    20, /* stxar_d */
+    20, /* stxai_d */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
 
     12,        /* hmuli */
     4, /* hmulr_u */
     8, /* hmuli_u */
+    8, /* ldxbr_c */
+    4, /* ldxbi_c */
+    8, /* ldxar_c */
+    8, /* ldxai_c */
+    8, /* ldxbr_uc */
+    4, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    8, /* ldxbr_s */
+    4, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    8, /* ldxbr_us */
+    4, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    4, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    8, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    8, /* ldxbr_d */
+    8, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    8, /* stxbr_c */
+    4, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    4, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    4, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    8, /* stxbr_f */
+    8, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    8, /* stxbr_d */
+    8, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __ARM_PCS_VFP */
 #endif /* __WORDSIZE */
index adf6a3b..d48eb45 100644 (file)
 #  define ARM_VMVNI                    0x02800030
 #  define ARM_VLDR                     0x0d100a00
 #  define ARM_VSTR                     0x0d000a00
-#  define ARM_VM                       0x0c000a00
+#  define ARM_VM_T1A1                  0x0c000b00
+#  define ARM_VM_T2A2                  0x0c000a00
 #  define ARM_VMOV_ADV_U               0x00800000 /* zero extend */
 #  define ARM_VMOV_ADV_8               0x00400000
 #  define ARM_VMOV_ADV_16              0x00000020
@@ -292,39 +293,7 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
 #  define VCVTR_S32_F64(r0,r1)         CC_VCVTR_S32_F64(ARM_CC_AL,r0,r1)
 #  define CC_VCVTR_U32_F64(cc,r0,r1)   cc_vo_ss(cc,ARM_VCVTR_U32_F64,r0,r1)
 #  define VCVTR_U32_F64(r0,r1)         CC_VCVTR_U32_F64(ARM_CC_AL,r0,r1)
-#  define CC_VLDMIA_F32(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I,r0,r1,i0)
-#  define VLDMIA_F32(r0,r1,i0)         CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
-#  define CC_VLDMIA_F64(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_V_F64,r0,r1,i0)
-#  define VLDMIA_F64(r0,r1,i0)         CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
-#  define CC_VSTMIA_F32(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM|ARM_M_I,r0,r1,i0)
-#  define VSTMIA_F32(r0,r1,i0)         CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
-#  define CC_VSTMIA_F64(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_V_F64,r0,r1,i0)
-#  define VSTMIA_F64(r0,r1,i0)         CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
-#  define CC_VLDMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U,r0,r1,i0)
-#  define VLDMIA_U_F32(r0,r1,i0)       CC_VLDMIA_U_F32(ARM_CC_AL,r0,r1,i0)
-#  define CC_VLDMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
-#  define VLDMIA_U_F64(r0,r1,i0)       CC_VLDMIA_U_F64(ARM_CC_AL,r0,r1,i0)
-#  define CC_VSTMIA_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U,r0,r1,i0)
-#  define VSTMIA_U_F32(r0,r1,i0)       CC_VSTMIA_U_F32(ARM_CC_AL,r0,r1,i0)
-#  define CC_VSTMIA_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_I|ARM_M_U|ARM_V_F64,r0,r1,i0)
-#  define VSTMIA_U_F64(r0,r1,i0)       CC_VSTMIA_U_F64(ARM_CC_AL,r0,r1,i0)
-#  define CC_VLDMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U,r0,r1,i0)
-#  define VLDMDB_U_F32(r0,r1,i0)       CC_VLDMDB_U_F32(ARM_CC_AL,r0,r1,i0)
-#  define CC_VLDMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_L|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
-#  define VLDMDB_U_F64(r0,r1,i0)       CC_VLDMDB_U_F64(ARM_CC_AL,r0,r1,i0)
-#  define CC_VSTMDB_U_F32(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U,r0,r1,i0)
-#  define VSTMDB_U_F32(r0,r1,i0)       CC_VSTMDB_U_F32(ARM_CC_AL,r0,r1,i0)
-#  define CC_VSTMDB_U_F64(cc,r0,r1,i0) cc_vorsl(cc,ARM_VM|ARM_M_B|ARM_M_U|ARM_V_F64,r0,r1,i0)
-#  define VSTMDB_U_F64(r0,r1,i0)       CC_VSTMDB_U_F64(ARM_CC_AL,r0,r1,i0)
-#  define CC_VPUSH_F32(cc,r0,i0)       CC_VSTMDB_U_F32(cc,_SP_REGNO,r0,i0)
-#  define VPUSH_F32(r0,i0)             CC_VPUSH_F32(ARM_CC_AL,r0,i0)
-#  define CC_VPUSH_F64(cc,r0,i0)       CC_VSTMDB_U_F64(cc,_SP_REGNO,r0,i0)
-#  define VPUSH_F64(r0,i0)             CC_VPUSH_F64(ARM_CC_AL,r0,i0)
-#  define CC_VPOP_F32(cc,r0,i0)                CC_VLDMIA_U_F32(cc,_SP_REGNO,r0,i0)
-#  define VPOP_F32(r0,i0)              CC_VPOP_F32(ARM_CC_AL,r0,i0)
-#  define CC_VPOP_F64(cc,r0,i0)                CC_VLDMIA_U_F64(cc,_SP_REGNO,r0,i0)
-#  define VPOP_F64(r0,i0)              CC_VPOP_F64(ARM_CC_AL,r0,i0)
-#  define CC_VMOV_A_S8(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
+#  define CC_VMOV_A_S8(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8,r0,r1)
 #  define VMOV_A_S8(r0,r1)             CC_VMOV_A_S8(ARM_CC_AL,r0,r1)
 #  define CC_VMOV_A_U8(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_8|ARM_VMOV_ADV_U,r0,r1)
 #  define VMOV_A_U8(r0,r1)             CC_VMOV_A_U8(ARM_CC_AL,r0,r1)
@@ -336,6 +305,14 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
 #  define VMOV_A_S32(r0,r1)            CC_VMOV_A_S32(ARM_CC_AL,r0,r1)
 #  define CC_VMOV_A_U32(cc,r0,r1)      cc_vori_(cc,ARM_VMOV_A_D|ARM_VMOV_ADV_U,r0,r1)
 #  define VMOV_A_U32(r0,r1)            CC_VMOV_A_U32(ARM_CC_AL,r0,r1)
+#  define CC_VLDMIA_F32(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM_T2A2|ARM_M_L|ARM_M_U|ARM_M_W,r0,r1,i0)
+#  define VLDMIA_F32(r0,r1,i0)         CC_VLDMIA_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_F32(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM_T2A2|ARM_M_U|ARM_M_W,r0,r1,i0)
+#  define VSTMIA_F32(r0,r1,i0)         CC_VSTMIA_F32(ARM_CC_AL,r0,r1,i0)
+#  define CC_VLDMIA_F64(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM_T1A1|ARM_M_L|ARM_M_U|ARM_M_W,r0,r1,(i0)<<1)
+#  define VLDMIA_F64(r0,r1,i0)         CC_VLDMIA_F64(ARM_CC_AL,r0,r1,i0)
+#  define CC_VSTMIA_F64(cc,r0,r1,i0)   cc_vorsl(cc,ARM_VM_T1A1|ARM_M_U|ARM_M_W,r0,r1,(i0)<<1)
+#  define VSTMIA_F64(r0,r1,i0)         CC_VSTMIA_F64(ARM_CC_AL,r0,r1,i0)
 #  define CC_VMOV_V_I8(cc,r0,r1)       cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_8,r1,r0)
 #  define VMOV_V_I8(r0,r1)             CC_VMOV_V_I8(ARM_CC_AL,r0,r1)
 #  define CC_VMOV_V_I16(cc,r0,r1)      cc_vorv_(cc,ARM_VMOV_D_A|ARM_VMOV_ADV_16,r1,r0)
@@ -475,19 +452,19 @@ static void _cc_vorsl(jit_state_t*,int,int,int,int,int);
 /* index is multipled by four */
 #  define CC_VLDRN_F32(cc,r0,r1,i0)    cc_vldst(cc,ARM_VLDR,r0,r1,i0)
 #  define VLDRN_F32(r0,r1,i0)          CC_VLDRN_F32(ARM_CC_AL,r0,r1,i0)
-#  define CC_VLDR_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR|ARM_P,r0,r1,i0)
+#  define CC_VLDR_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR|ARM_U,r0,r1,i0)
 #  define VLDR_F32(r0,r1,i0)           CC_VLDR_F32(ARM_CC_AL,r0,r1,i0)
 #  define CC_VLDRN_F64(cc,r0,r1,i0)    cc_vldst(cc,ARM_VLDR|ARM_V_F64,r0,r1,i0)
 #  define VLDRN_F64(r0,r1,i0)          CC_VLDRN_F64(ARM_CC_AL,r0,r1,i0)
-#  define CC_VLDR_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_P,r0,r1,i0)
+#  define CC_VLDR_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VLDR|ARM_V_F64|ARM_U,r0,r1,i0)
 #  define VLDR_F64(r0,r1,i0)           CC_VLDR_F64(ARM_CC_AL,r0,r1,i0)
 #  define CC_VSTRN_F32(cc,r0,r1,i0)    cc_vldst(cc,ARM_VSTR,r0,r1,i0)
 #  define VSTRN_F32(r0,r1,i0)          CC_VSTRN_F32(ARM_CC_AL,r0,r1,i0)
-#  define CC_VSTR_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR|ARM_P,r0,r1,i0)
+#  define CC_VSTR_F32(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR|ARM_U,r0,r1,i0)
 #  define VSTR_F32(r0,r1,i0)           CC_VSTR_F32(ARM_CC_AL,r0,r1,i0)
 #  define CC_VSTRN_F64(cc,r0,r1,i0)    cc_vldst(cc,ARM_VSTR|ARM_V_F64,r0,r1,i0)
 #  define VSTRN_F64(r0,r1,i0)          CC_VSTRN_F64(ARM_CC_AL,r0,r1,i0)
-#  define CC_VSTR_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_P,r0,r1,i0)
+#  define CC_VSTR_F64(cc,r0,r1,i0)     cc_vldst(cc,ARM_VSTR|ARM_V_F64|ARM_U,r0,r1,i0)
 #  define VSTR_F64(r0,r1,i0)           CC_VSTR_F64(ARM_CC_AL,r0,r1,i0)
 #  define vfp_popcntr(r0,r1)           _vfp_popcntr(_jit,r0,r1)
 static void _vfp_popcntr(jit_state_t*,jit_int32_t,jit_int32_t);
@@ -852,6 +829,10 @@ static void _vfp_ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _vfp_unldr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #  define vfp_unldi_x(r0, i0, i1)      _vfp_unldi_x(_jit, r0, i0, i1)
 static void _vfp_unldi_x(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+#  define vfp_ldxai_f(r0, r1, i0)      _vfp_ldxai_f(_jit, r0, r1, i0)
+static void _vfp_ldxai_f(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+#  define vfp_ldxai_d(r0, r1, i0)      _vfp_ldxai_d(_jit, r0, r1, i0)
+static void _vfp_ldxai_d(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
 #  define vfp_str_f(r0,r1)             VSTR_F32(r1,r0,0)
 #  define vfp_str_d(r0,r1)             VSTR_F64(r1,r0,0)
 #  define vfp_sti_f(i0,r0)             _vfp_sti_f(_jit,i0,r0)
@@ -870,6 +851,10 @@ static void _vfp_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static void _vfp_unstr_x(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #define vfp_unsti_x(i0, r0, i1)                _vfp_unsti_x(_jit, i0, r0, i1)
 static void _vfp_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
+#  define vfp_stxai_f(i0, r0, r1)      _vfp_stxai_f(_jit, i0, r0, r1)
+static void _vfp_stxai_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define vfp_stxai_d(i0, r0, r1)      _vfp_stxai_d(_jit, i0, r0, r1)
+static void _vfp_stxai_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 #  define vfp_vaarg_d(r0, r1)          _vfp_vaarg_d(_jit, r0, r1)
 static void _vfp_vaarg_d(jit_state_t*, jit_int32_t, jit_int32_t);
 #endif
@@ -1273,11 +1258,9 @@ _cc_vorsl(jit_state_t *_jit, int cc, int o, int r0, int r1, int i0)
     jit_thumb_t        thumb;
     assert(!(cc & 0x0fffffff));
     assert(!(o  & 0xf00ff0ff));
-    /* save i0 double precision registers */
-    if (o & ARM_V_F64)         i0 <<= 1;
-    /* if (r1 & 1) cc & ARM_V_F64 must be false */
-    if (r1 & 1)        o |= ARM_V_D;   r1 = vfp_regno(r1);
-    assert(i0 && !(i0 & 1) && r1 + i0 <= 32);
+    assert(!(r1 & 1));
+    r1 = vfp_regno(r1);
+    assert(i0 && r1 + i0 <= 32);
     thumb.i = cc|o|(_u4(r0)<<16)|(_u4(r1)<<12)|_u8(i0);
     if (jit_thumb_p())
        iss(thumb.s[0], thumb.s[1]);
@@ -2615,6 +2598,30 @@ _vfp_unldi_x(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0, jit_word_t i1)
     }
 }
 
+static void
+_vfp_ldxai_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(jit_fpr_p(r0));
+    if (i0 == 4)
+       VLDMIA_F32(r1, r0, 1);
+    else {
+       addi(r1, r1, i0);
+       vfp_ldr_f(r0, r1);
+    }
+}
+
+static void
+_vfp_ldxai_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    assert(jit_fpr_p(r0));
+    if (i0 == 8)
+       VLDMIA_F64(r1, r0, 1);
+    else {
+       addi(r1, r1, i0);
+       vfp_ldr_d(r0, r1);
+    }
+}
+
 static void
 _vfp_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
@@ -2848,6 +2855,30 @@ _vfp_unsti_x(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
     }
 }
 
+static void
+_vfp_stxai_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(jit_fpr_p(r1));
+    if (i0 == 4)
+       VSTMIA_F32(r0, r1, 1);
+    else {
+       addi(r0, r0, i0);
+       vfp_str_f(r0, r1);
+    }
+}
+
+static void
+_vfp_stxai_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    assert(jit_fpr_p(r1));
+    if (i0 == 8)
+       VSTMIA_F64(r0, r1, 1);
+    else {
+       addi(r0, r0, i0);
+       vfp_str_d(r0, r1);
+    }
+}
+
 static void
 _vfp_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
index 25aa7cb..d2bee76 100644 (file)
@@ -1352,6 +1352,24 @@ _emit_code(jit_state_t *_jit)
                                        rn(node->v.q.h), rn(node->w.w));\
            case jit_code_##name##i##type:                              \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w),             \
+                             rn(node->w.w));                           \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1667,6 +1685,68 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+           case jit_code_ldxbr_f:
+               addr(rn(node->v.w), rn(node->v.w), rn(node->w.w));
+               goto L_ldxbi_f;
+           case jit_code_ldxbi_f:
+               addi(rn(node->v.w), rn(node->v.w), node->w.w);
+           L_ldxbi_f:
+               if (jit_swf_p())
+                   swf_ldr_f(rn(node->u.w), rn(node->v.w));
+               else
+                   vfp_ldr_f(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_ldxar_f:
+               if (jit_swf_p())
+                   swf_ldr_f(rn(node->u.w), rn(node->v.w));
+               else
+                   vfp_ldr_f(rn(node->u.w), rn(node->v.w));
+               addr(rn(node->v.w), rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_ldxai_f:
+               if (jit_swf_p()) {
+                   swf_ldr_f(rn(node->u.w), rn(node->v.w));
+                   addi(rn(node->v.w), rn(node->v.w), node->w.w);
+               }
+               else
+                   vfp_ldxai_f(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_ldxbr_d:
+               addr(rn(node->v.w), rn(node->v.w), rn(node->w.w));
+               goto L_ldxbi_d;
+           case jit_code_ldxbi_d:
+               addi(rn(node->v.w), rn(node->v.w), node->w.w);
+           L_ldxbi_d:
+               if (jit_swf_p())
+                   swf_ldr_d(rn(node->u.w), rn(node->v.w));
+               else
+                   vfp_ldr_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_ldxar_d:
+               if (jit_swf_p())
+                   swf_ldr_d(rn(node->u.w), rn(node->v.w));
+               else
+                   vfp_ldr_d(rn(node->u.w), rn(node->v.w));
+               addr(rn(node->v.w), rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_ldxai_d:
+               if (jit_swf_p()) {
+                   swf_ldr_d(rn(node->u.w), rn(node->v.w));
+                   addi(rn(node->v.w), rn(node->v.w), node->w.w);
+               }
+               else
+                   vfp_ldxai_d(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1685,6 +1765,64 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+           case jit_code_stxbr_f:
+               addr(rn(node->v.w), rn(node->v.w), rn(node->u.w));
+               goto L_stxbi_f;
+           case jit_code_stxbi_f:
+               addi(rn(node->v.w), rn(node->v.w), node->u.w);
+           L_stxbi_f:
+               if (jit_swf_p())
+                   swf_str_f(rn(node->v.w), rn(node->w.w));
+               else
+                   vfp_str_f(rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_stxar_f:
+               if (jit_swf_p())
+                   swf_str_f(rn(node->v.w), rn(node->w.w));
+               else
+                   vfp_str_f(rn(node->v.w), rn(node->w.w));
+               addr(rn(node->v.w), rn(node->v.w), rn(node->u.w));
+               break;
+           case jit_code_stxai_f:
+               if (jit_swf_p()) {
+                   swf_str_f(rn(node->v.w), rn(node->w.w));
+                   addi(rn(node->v.w), rn(node->v.w), node->u.w);
+               }
+               else
+                   vfp_stxai_f(node->u.w, rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_stxbr_d:
+               addr(rn(node->v.w), rn(node->v.w), rn(node->u.w));
+               goto L_stxbi_d;
+           case jit_code_stxbi_d:
+               addi(rn(node->v.w), rn(node->v.w), node->u.w);
+           L_stxbi_d:
+               if (jit_swf_p())
+                   swf_str_d(rn(node->v.w), rn(node->w.w));
+               else
+                   vfp_str_d(rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_stxar_d:
+               if (jit_swf_p())
+                   swf_str_d(rn(node->v.w), rn(node->w.w));
+               else
+                   vfp_str_d(rn(node->v.w), rn(node->w.w));
+               addr(rn(node->v.w), rn(node->v.w), rn(node->u.w));
+               break;
+           case jit_code_stxai_d:
+               if (jit_swf_p()) {
+                   swf_str_d(rn(node->v.w), rn(node->w.w));
+                   addi(rn(node->v.w), rn(node->v.w), node->u.w);
+               }
+               else
+                   vfp_stxai_d(node->u.w, rn(node->v.w), rn(node->w.w));
+               break;
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(bswap, _us);
@@ -2327,6 +2465,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_vvw
 #undef case_rrw
 #undef case_vvv
+#undef case_rrx
+#undef case_rrX
+#undef case_xrr
+#undef case_Xrr
 #undef case_rrr
 #undef case_wv
 #undef case_wr
@@ -2353,7 +2495,7 @@ _emit_code(jit_state_t *_jit)
             * FIXME can this cause issues in the preprocessor prefetch
             * or something else? should not, as the constants are after
             * an unconditional jump */
-           if (value & ARM_P)  value =   value & 0x00000fff;
+           if (value & ARM_U)  value =   value & 0x00000fff;
            else                value = -(value & 0x00000fff);
            word = word + 8 + value;
        }
index 90d90b0..7866f2e 100644 (file)
@@ -112,6 +112,12 @@ jit_init_debug(const char *progname, FILE *stream)
 #  if defined(__s390__) || defined(__s390x__)
     disasm_info.disassembler_options = "zarch";
 #  endif
+#  if defined(__sh__)
+    disasm_info.arch = bfd_arch_sh;
+    disasm_info.mach = bfd_mach_sh4;
+    disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_LITTLE;
+#  endif
+
     disasm_info.print_address_func = disasm_print_address;
 
 # if BINUTILS_2_29
index 3a471eb..ce7bffd 100644 (file)
@@ -302,6 +302,10 @@ static void _fallback_unsti_x(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 #    define fallback_patch_bmsi(inst, lbl)                             \
        patch_at(inst, lbl)
 #  endif
+#  if __WORDSIZE == 32
+#    define fallback_divi_u(r0,r1,i0)  _fallback_divi_u(_jit,r0,r1,i0)
+static void _fallback_divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
 #endif
 
 #if CODE
@@ -4241,4 +4245,31 @@ _fallback_unsti_x(jit_state_t *_jit,
     jit_unget_reg(t0);
 }
 #  endif
+
+#  if __WORDSIZE == 32
+static void _fallback_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t t0;
+    unsigned int p, m;
+
+    if (i0 == 1) {
+        movr(r0, r1);
+    } else if (i0 >= 0x80000001) {
+        gei_u(r0, r1, i0);
+    } else {
+        p = 31 - __builtin_clz(i0) + !!(i0 & (i0 - 1));
+        m = (unsigned int)(((0x1ull << (32 + p)) + i0 - 1) / (unsigned long long)i0);
+
+        t0 = fallback_jit_get_reg(jit_class_gpr);
+
+        hmuli_u(rn(t0), r1, m);
+        subr(r0, r1, rn(t0));
+        rshi_u(r0, r0, 1);
+        addr(r0, r0, rn(t0));
+        rshi_u(r0, r0, p - 1);
+
+        jit_unget_reg(t0);
+    }
+}
+#  endif
 #endif
index 7a23ebd..e621cb7 100644 (file)
     40,        /* hmuli */
     48,        /* hmulr_u */
     56,        /* hmuli_u */
+    12, /* ldxbr_c */
+    12, /* ldxbi_c */
+    12, /* ldxar_c */
+    12, /* ldxai_c */
+    8, /* ldxbr_uc */
+    8, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    12, /* ldxbr_s */
+    12, /* ldxbi_s */
+    12, /* ldxar_s */
+    12, /* ldxai_s */
+    8, /* ldxbr_us */
+    8, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    8, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    8, /* ldxbr_d */
+    8, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    8, /* stxbr_c */
+    8, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    8, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    8, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    8, /* stxbr_f */
+    8, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    8, /* stxbr_d */
+    8, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __WORDSIZE */
index 6330bf6..60f7786 100644 (file)
@@ -912,6 +912,26 @@ _emit_code(jit_state_t *_jit)
                              rn(node->v.q.h), rn(node->w.w));          \
            case jit_code_##name##i##type:                              \
                break;
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               generic_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               generic_##name##i##type(node->u.w, rn(node->v.w),       \
+                                       rn(node->w.w));                 \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1195,6 +1215,20 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1215,6 +1249,16 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_brr(blt,);
                case_brw(blt,);
                case_brr(blt, _u);
@@ -1688,6 +1732,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_rrrw
 #undef case_rrw
 #undef case_rrrr
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrr
 #undef case_wr
 #undef case_rw
index 9033334..5a397b5 100644 (file)
@@ -1,5 +1,5 @@
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 144
+#define JIT_INSTR_MAX 208
     0, /* data */
     0, /* live */
     32,        /* align */
     48,        /* unldi */
     96,        /* unldr_u */
     48,        /* unldi_u */
-    128,       /* unstr */
-    96,        /* unsti */
+    192,       /* unstr */
+    64,        /* unsti */
     80,        /* unldr_x */
     48,        /* unldi_x */
-    144,       /* unstr_x */
-    112,       /* unsti_x */
+    208,       /* unstr_x */
+    64,        /* unsti_x */
     16,        /* fmar_f */
     0, /* fmai_f */
     16,        /* fmsr_f */
     0, /* fnmai_d */
     16,        /* fnmsr_d */
     0, /* fnmsi_d */
-    32, /* hmulr */
-    32, /* hmuli */
-    32, /* hmulr_u */
-    32, /* hmuli_u */
+    32,        /* hmulr */
+    32,        /* hmuli */
+    32,        /* hmulr_u */
+    32,        /* hmuli_u */
+    16,        /* ldxbr_c */
+    16,        /* ldxbi_c */
+    16,        /* ldxar_c */
+    16,        /* ldxai_c */
+    16,        /* ldxbr_uc */
+    16,        /* ldxbi_uc */
+    16,        /* ldxar_uc */
+    16,        /* ldxai_uc */
+    16,        /* ldxbr_s */
+    16,        /* ldxbi_s */
+    16,        /* ldxar_s */
+    16,        /* ldxai_s */
+    16,        /* ldxbr_us */
+    16,        /* ldxbi_us */
+    16,        /* ldxar_us */
+    16,        /* ldxai_us */
+    16,        /* ldxbr_i */
+    16,        /* ldxbi_i */
+    16,        /* ldxar_i */
+    16,        /* ldxai_i */
+    16,        /* ldxbr_ui */
+    16,        /* ldxbi_ui */
+    16,        /* ldxar_ui */
+    16,        /* ldxai_ui */
+    16,        /* ldxbr_l */
+    16,        /* ldxbi_l */
+    16,        /* ldxar_l */
+    16,        /* ldxai_l */
+    16,        /* ldxbr_f */
+    16,        /* ldxbi_f */
+    16,        /* ldxar_f */
+    16,        /* ldxai_f */
+    16,        /* ldxbr_d */
+    16,        /* ldxbi_d */
+    16,        /* ldxar_d */
+    16,        /* ldxai_d */
+    16,        /* stxbr_c */
+    16,        /* stxbi_c */
+    16,        /* stxar_c */
+    16,        /* stxai_c */
+    16,        /* stxbr_s */
+    16,        /* stxbi_s */
+    16,        /* stxar_s */
+    16,        /* stxai_s */
+    16,        /* stxbr_i */
+    16,        /* stxbi_i */
+    16,        /* stxar_i */
+    16,        /* stxai_i */
+    16,        /* stxbr_l */
+    16,        /* stxbi_l */
+    16,        /* stxar_l */
+    16,        /* stxai_l */
+    16,        /* stxbr_f */
+    16,        /* stxbi_f */
+    16,        /* stxar_f */
+    16,        /* stxai_f */
+    16,        /* stxbr_d */
+    16,        /* stxbi_d */
+    16,        /* stxar_d */
+    16,        /* stxai_d */
 #endif /* __WORDSIZE */
index f689231..a711a8b 100644 (file)
@@ -1043,6 +1043,26 @@ _emit_code(jit_state_t *_jit)
                name##r##type(rn(node->u.w),                            \
                              rn(node->v.w), rn(node->w.w));            \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               generic_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               generic_##name##i##type(node->u.w, rn(node->v.w),       \
+                                       rn(node->w.w));                 \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1344,6 +1364,24 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1366,6 +1404,18 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_brr(blt,);
                case_brw(blt,);
                case_brr(blt, _u);
@@ -1897,6 +1947,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_rrf
 #undef case_rrw
 #undef case_rrrr
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrr
 #undef case_wr
 #undef case_rw
index 18e73aa..6eaf397 100644 (file)
     20,        /* hmuli */
     4, /* hmulr_u */
     20,        /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    8, /* ldxar_c */
+    8, /* ldxai_c */
+    8, /* ldxbr_uc */
+    8, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    8, /* ldxbr_s */
+    8, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    8, /* ldxbr_us */
+    8, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    8, /* ldxbr_ui */
+    8, /* ldxbi_ui */
+    8, /* ldxar_ui */
+    8, /* ldxai_ui */
+    8, /* ldxbr_l */
+    8, /* ldxbi_l */
+    8, /* ldxar_l */
+    8, /* ldxai_l */
+    8, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    8, /* ldxbr_d */
+    8, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    8, /* stxbr_c */
+    8, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    8, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    8, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    8, /* stxbr_l */
+    8, /* stxbi_l */
+    8, /* stxar_l */
+    8, /* stxai_l */
+    8, /* stxbr_f */
+    8, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    8, /* stxbr_d */
+    8, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __WORDSIZE */
index cd38c4e..7b49819 100644 (file)
@@ -963,6 +963,26 @@ _emit_code(jit_state_t *_jit)
                name##r##type(rn(node->u.w),                            \
                              rn(node->v.w), rn(node->w.w));            \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               generic_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               generic_##name##i##type(node->u.w, rn(node->v.w),       \
+                                       rn(node->w.w));                 \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1203,6 +1223,24 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1225,6 +1263,18 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(hton, _ul);
@@ -1813,6 +1863,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_brr
 #undef case_wrr
 #undef case_rrw
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrr
 #undef case_wr
 #undef case_rw
index 61db30e..8ad97e9 100644 (file)
@@ -349,7 +349,7 @@ static void _movi_f(jit_state_t*,jit_int32_t,jit_float32_t*);
 #    define movi64(r0, i0)             _movi64(_jit, r0, i0)
 static void _movi64(jit_state_t*,jit_int32_t,jit_int64_t);
 #    define movi_d_w(r0, i0)           _movi_d_w(_jit, r0, i0)
-static void _movi_d_w(jit_state_t*,jit_int32_t,jit_int64_t);
+static void _movi_d_w(jit_state_t*,jit_int32_t,jit_float64_t);
 #  elif __WORDSIZE == 64
 #    define movi64(r0, i0)             movi(r0, i0)
 #  endif
@@ -1152,7 +1152,7 @@ _movi64(jit_state_t *_jit, jit_int32_t r0, jit_int64_t i0)
 }
 
 static void
-_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_int64_t i0)
+_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
 {
     union {
        jit_int64_t     l;
index 156fc95..cde9cb7 100644 (file)
     16,        /* hmuli */
     8, /* hmulr_u */
     16,        /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    8, /* ldxar_c */
+    8, /* ldxai_c */
+    8, /* ldxbr_uc */
+    8, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    8, /* ldxbr_s */
+    8, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    8, /* ldxbr_us */
+    8, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    8, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    12, /* ldxbr_d */
+    12, /* ldxbi_d */
+    12, /* ldxar_d */
+    12, /* ldxai_d */
+    8, /* stxbr_c */
+    8, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    8, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    8, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    8, /* stxbr_f */
+    8, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    12, /* stxbr_d */
+    12, /* stxbi_d */
+    12, /* stxar_d */
+    12, /* stxai_d */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
     28,        /* hmuli */
     8, /* hmulr_u */
     28,        /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    8, /* ldxar_c */
+    8, /* ldxai_c */
+    8, /* ldxbr_uc */
+    8, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    8, /* ldxbr_s */
+    8, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    8, /* ldxbr_us */
+    8, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    8, /* ldxbr_ui */
+    8, /* ldxbi_ui */
+    8, /* ldxar_ui */
+    8, /* ldxai_ui */
+    8, /* ldxbr_l */
+    8, /* ldxbi_l */
+    8, /* ldxar_l */
+    8, /* ldxai_l */
+    8, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    8, /* ldxbr_d */
+    8, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    8, /* stxbr_c */
+    8, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    8, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    8, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    8, /* stxbr_l */
+    8, /* stxbi_l */
+    8, /* stxar_l */
+    8, /* stxai_l */
+    8, /* stxbr_f */
+    8, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    8, /* stxbr_d */
+    8, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __WORDSIZE */
index 1fec109..6eb41a0 100644 (file)
@@ -1424,6 +1424,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_##name##i##type:                              \
                name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               generic_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               generic_##name##i##type(node->u.w, rn(node->v.w),       \
+                                       rn(node->w.w));                 \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1645,6 +1665,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+#if __WORDSIZE == 64
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+#endif
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1671,6 +1711,21 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+#if __WORDSIZE == 64
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+#endif
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_rr(hton, _us);
                case_rr(hton, _ui);
 #if __WORDSIZE == 64
@@ -2129,8 +2184,7 @@ _emit_code(jit_state_t *_jit)
            case jit_code_movi_w_d:
                movi_w_d(rn(node->u.w), node->v.w);
                break;
-#endif
-#if __WORDSIZE == 32
+#else
            case jit_code_movr_ww_d:
                movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
                break;
@@ -2320,6 +2374,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_brr
 #undef case_wrr
 #undef case_rrf
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrw
 #undef case_rrr
 #undef case_wr
index 88bc717..e72e549 100644 (file)
@@ -291,4 +291,34 @@ static char *code_name[] = {
     "fnmsr_d",         "fnmsi_d",
     "hmulr",           "hmuli",
     "hmulr_u",         "hmuli_u",
+    "ldxbr_c",         "ldxbi_c",
+    "ldxar_c",         "ldxai_c",
+    "ldxbr_uc",                "ldxbi_uc",
+    "ldxar_uc",                "ldxai_uc",
+    "ldxbr_s",         "ldxbi_s",
+    "ldxar_s",         "ldxai_s",
+    "ldxbr_us",                "ldxbi_us",
+    "ldxar_us",                "ldxai_us",
+    "ldxbr_i",         "ldxbi_i",
+    "ldxar_i",         "ldxai_i",
+    "ldxbr_ui",                "ldxbi_ui",
+    "ldxar_ui",                "ldxai_ui",
+    "ldxbr_l",         "ldxbi_l",
+    "ldxar_l",         "ldxai_l",
+    "ldxbr_f",         "ldxbi_f",
+    "ldxar_f",         "ldxai_f",
+    "ldxbr_d",         "ldxbi_d",
+    "ldxar_d",         "ldxai_d",
+    "stxbr_c",         "stxbi_c",
+    "stxar_c",         "stxai_c",
+    "stxbr_s",         "stxbi_s",
+    "stxar_s",         "stxai_s",
+    "stxbr_i",         "stxbi_i",
+    "stxar_i",         "stxai_i",
+    "stxbr_l",         "stxbi_l",
+    "stxar_l",         "stxai_l",
+    "stxbr_f",         "stxbi_f",
+    "stxar_f",         "stxai_f",
+    "stxbr_d",         "stxbi_d",
+    "stxar_d",         "stxai_d",
 };
index 8ea8e62..e1829c7 100644 (file)
@@ -301,6 +301,8 @@ static void _FXS(jit_state_t*,int,int,int,int,int,int,int);
 #  define LWZX(d,a,b)                  FX(31,d,a,b,23)
 #  define LD(d,a,s)                    FDs(58,d,a,s)
 #  define LDX(d,a,b)                   FX(31,d,a,b,21)
+#  define LDU(d,a,s)                   FDs(58,d,a,s|1)
+#  define LDUX(d,a,b)                  FX(31,d,a,b,53)
 #  define MCRF(d,s)                    FXL(19,((d)<<2),((s)<<2),0)
 #  if DEBUG
 /* In case instruction is emulated, check the kernel can handle it.
@@ -893,6 +895,52 @@ static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #    define ldxi_l(r0,r1,i0)           _ldxi_l(_jit,r0,r1,i0)
 static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  endif
+#  define ldxbr_c(r0,r1,r2)            _ldxbr_c(_jit,r0,r1,r2)
+static void _ldxbr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxbi_c(r0,r1,i0)            _ldxbi_c(_jit,r0,r1,i0)
+static void _ldxbi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_uc(r0,r1,r2)           _ldxbr_uc(_jit,r0,r1,r2)
+static void _ldxbr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxbi_uc(r0,r1,i0)           _ldxbi_uc(_jit,r0,r1,i0)
+static void _ldxbi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_s(r0,r1,r2)            _ldxbr_s(_jit,r0,r1,r2)
+static void _ldxbr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxbi_s(r0,r1,i0)            _ldxbi_s(_jit,r0,r1,i0)
+static void _ldxbi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_us(r0,r1,r2)           _ldxbr_us(_jit,r0,r1,r2)
+static void _ldxbr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxbi_us(r0,r1,i0)           _ldxbi_us(_jit,r0,r1,i0)
+static void _ldxbi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_i(r0,r1,r2)            _ldxbr_i(_jit,r0,r1,r2)
+static void _ldxbr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxbi_i(r0,r1,i0)            _ldxbi_i(_jit,r0,r1,i0)
+static void _ldxbi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  if __WORDSIZE == 64
+#    define ldxbr_ui(r0,r1,r2)         _ldxbr_ui(_jit,r0,r1,r2)
+static void _ldxbr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxbi_ui(r0,r1,i0)         _ldxbi_ui(_jit,r0,r1,i0)
+static void _ldxbi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define ldxbr_l(r0,r1,r2)          _ldxbr_l(_jit,r0,r1,r2)
+static void _ldxbr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define ldxbi_l(r0,r1,i0)          _ldxbi_l(_jit,r0,r1,i0)
+static void _ldxbi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
+#  define ldxar_c(r0,r1,r2)            generic_ldxar_c(r0,r1,r2)
+#  define ldxai_c(r0,r1,i0)            generic_ldxai_c(r0,r1,i0)
+#  define ldxar_uc(r0,r1,r2)           generic_ldxar_uc(r0,r1,r2)
+#  define ldxai_uc(r0,r1,i0)           generic_ldxai_uc(r0,r1,i0)
+#  define ldxar_s(r0,r1,r2)            generic_ldxar_s(r0,r1,r2)
+#  define ldxai_s(r0,r1,i0)            generic_ldxai_s(r0,r1,i0)
+#  define ldxar_us(r0,r1,r2)           generic_ldxar_us(r0,r1,r2)
+#  define ldxai_us(r0,r1,i0)           generic_ldxai_us(r0,r1,i0)
+#  define ldxar_i(r0,r1,r2)            generic_ldxar_i(r0,r1,r2)
+#  define ldxai_i(r0,r1,i0)            generic_ldxai_i(r0,r1,i0)
+#  if __WORDSIZE == 64
+#    define ldxar_ui(r0,r1,r2)         generic_ldxar_ui(r0,r1,r2)
+#    define ldxai_ui(r0,r1,i0)         generic_ldxai_ui(r0,r1,i0)
+#    define ldxar_l(r0,r1,r2)          generic_ldxar_l(r0,r1,r2)
+#    define ldxai_l(r0,r1,i0)          generic_ldxai_l(r0,r1,i0)
+#  endif
 #  define str_c(r0,r1)                 STBX(r1, _R0_REGNO, r0)
 #  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
 static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
@@ -923,6 +971,34 @@ static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #    define stxi_l(i0,r0,r1)           _stxi_l(_jit,i0,r0,r1)
 static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  endif
+#  define stxbr_c(r0,r1,r2)            _stxbr_c(_jit,r0,r1,r2)
+static void _stxbr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxbi_c(i0,r0,r1)            _stxbi_c(_jit,i0,r0,r1)
+static void _stxbi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxbr_s(r0,r1,r2)            _stxbr_s(_jit,r0,r1,r2)
+static void _stxbr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxbi_s(i0,r0,r1)            _stxbi_s(_jit,i0,r0,r1)
+static void _stxbi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxbr_i(r0,r1,r2)            _stxbr_i(_jit,r0,r1,r2)
+static void _stxbr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxbi_i(i0,r0,r1)            _stxbi_i(_jit,i0,r0,r1)
+static void _stxbi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  if __WORDSIZE == 64
+#    define stxbr_l(r0,r1,r2)          _stxbr_l(_jit,r0,r1,r2)
+static void _stxbr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define stxbi_l(i0,r0,r1)          _stxbi_l(_jit,i0,r0,r1)
+static void _stxbi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  endif
+#  define stxar_c(r0,r1,r2)            generic_stxar_c(r0,r1,r2)
+#  define stxai_c(r0,r1,i0)            generic_stxai_c(r0,r1,i0)
+#  define stxar_s(r0,r1,r2)            generic_stxar_s(r0,r1,r2)
+#  define stxai_s(r0,r1,i0)            generic_stxai_s(r0,r1,i0)
+#  define stxar_i(r0,r1,r2)            generic_stxar_i(r0,r1,r2)
+#  define stxai_i(r0,r1,i0)            generic_stxai_i(r0,r1,i0)
+#  if __WORDSIZE == 64
+#    define stxar_l(r0,r1,r2)          generic_stxar_l(r0,r1,r2)
+#    define stxai_l(r0,r1,i0)          generic_stxai_l(r0,r1,i0)
+#  endif
 #  define jmpr(r0)                     _jmpr(_jit,r0)
 static void _jmpr(jit_state_t*,jit_int32_t);
 #  define jmpi(i0)                     _jmpi(_jit,i0)
@@ -1301,17 +1377,21 @@ _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 static void
 _popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
-    jit_int32_t                reg;
-    reg = jit_get_reg(jit_class_gpr);
-    POPCNTB(r0, r1);
+    if (jit_cpu.popcntb) {
+       jit_int32_t             reg;
+       reg = jit_get_reg(jit_class_gpr);
+       POPCNTB(r0, r1);
 #if __WORDSIZE == 32
-    movi(rn(reg), 0x01010101);
+       movi(rn(reg), 0x01010101);
 #else
-    movi(rn(reg), 0x0101010101010101);
+       movi(rn(reg), 0x0101010101010101);
 #endif
-    mullr(r0, r0, rn(reg));
-    rshi_u(r0, r0, __WORDSIZE - 8);
-    jit_unget_reg(reg);
+       mullr(r0, r0, rn(reg));
+       rshi_u(r0, r0, __WORDSIZE - 8);
+       jit_unget_reg(reg);
+    }
+    else
+       fallback_popcnt(r0, r1);
 }
 
 static void
@@ -3340,6 +3420,182 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 }
 #  endif
 
+static void
+_ldxbr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    ldxbr_uc(r0, r1, r2);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxbi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxbi_uc(r0, r1, i0);
+    extr_c(r0, r0);
+}
+
+static void
+_ldxbr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    assert(r0 != r1);
+    LBZUX(r0, r1, r2);
+}
+
+static void
+_ldxbi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       assert(r0 != r1);
+       LBZU(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_uc(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxbr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    assert(r0 != r1);
+    LHAUX(r0, r1, r2);
+}
+
+static void
+_ldxbi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       assert(r0 != r1);
+       LHAU(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_s(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxbr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    assert(r0 != r1);
+    LHZUX(r0, r1, r2);
+}
+
+static void
+_ldxbi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       assert(r0 != r1);
+       LHZU(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_us(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxbr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    assert(r0 != r1);
+#  if __WORDSIZE == 32
+    LWZUX(r0, r1, r2);
+#  else
+    ldxbr_ui(r0, r1, r2);
+    extr_i(r0, r0);
+#  endif
+}
+
+static void
+_ldxbi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       assert(r0 != r1);
+#  if __WORDSIZE == 32
+       LWZU(r0, r1, i0);
+#  else
+       ldxbi_ui(r0, r1, i0);
+       extr_i(r0, r0);
+#  endif
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_i(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_ldxbr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    assert(r0 != r1);
+    LWZUX(r0, r1, r2);
+}
+
+static void
+_ldxbi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       assert(r0 != r1);
+       LWZU(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_ui(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxbr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    assert(r0 != r1);
+    LDUX(r0, r1, r2);
+}
+
+static void
+_ldxbi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       assert(r0 != r1);
+       LDU(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_l(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
 static void
 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
@@ -3614,6 +3870,100 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 }
 #  endif
 
+static void
+_stxbr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    STBUX(r2, r1, r0);
+}
+
+static void
+_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       STBU(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxbr_c(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxbr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    STHUX(r2, r1, r0);
+}
+
+static void
+_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       STHU(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxbr_s(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxbr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    STWUX(r2, r1, r0);
+}
+
+static void
+_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       STWU(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxbr_i(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+#  if __WORDSIZE == 64
+static void
+_stxbr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    STDUX(r2, r1, r0);
+}
+
+static void
+_stxbi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       STDU(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxbr_l(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+#  endif
+
 static void
 _jmpr(jit_state_t *_jit, jit_int32_t r0)
 {
index 605bd4f..8457b69 100644 (file)
@@ -401,6 +401,18 @@ static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
 static void _ldxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
 static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_f(r0,r1,r2)            _ldxbr_f(_jit,r0,r1,r2)
+static void _ldxbr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxbi_f(r0,r1,i0)            _ldxbi_f(_jit,r0,r1,i0)
+static void _ldxbi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxbr_d(r0,r1,r2)            _ldxbr_d(_jit,r0,r1,r2)
+static void _ldxbr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define ldxbi_d(r0,r1,i0)            _ldxbi_d(_jit,r0,r1,i0)
+static void _ldxbi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define ldxar_f(r0,r1,r2)            generic_ldxar_f(r0,r1,r2)
+#  define ldxai_f(r0,r1,i0)            generic_ldxai_f(r0,r1,i0)
+#  define ldxar_d(r0,r1,r2)            generic_ldxar_d(r0,r1,r2)
+#  define ldxai_d(r0,r1,i0)            generic_ldxai_d(r0,r1,i0)
 #  define str_f(r0,r1)                 STFSX(r1, _R0_REGNO, r0)
 #  define sti_f(i0,r0)                 _sti_f(_jit,i0,r0)
 static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
@@ -422,6 +434,18 @@ static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
 static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
 static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxbr_f(r0,r1,r2)            _stxbr_f(_jit,r0,r1,r2)
+static void _stxbr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxbi_f(i0,r0,r1)            _stxbi_f(_jit,i0,r0,r1)
+static void _stxbi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxbr_d(r0,r1,r2)            _stxbr_d(_jit,r0,r1,r2)
+static void _stxbr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define stxbi_d(i0,r0,r1)            _stxbi_d(_jit,i0,r0,r1)
+static void _stxbi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  define stxar_f(r0,r1,r2)            generic_stxar_f(r0,r1,r2)
+#  define stxai_f(i0,r0,r1)            generic_stxai_f(i0,r0,r1)
+#  define stxar_d(r0,r1,r2)            generic_stxar_d(r0,r1,r2)
+#  define stxai_d(i0,r0,r1)            generic_stxai_d(i0,r0,r1)
 #endif
 
 #if CODE
@@ -1156,6 +1180,56 @@ _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
     }
 }
 
+static void
+_ldxbr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    assert(r0 != r1);
+    LFSUX(r0, r1, r2);
+}
+
+static void
+_ldxbi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       assert(r0 != r1);
+       LFSU(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_f(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ldxbr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    assert(r0 != r1);
+    LFDUX(r0, r1, r2);
+}
+
+static void
+_ldxbi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       assert(r0 != r1);
+       LFDU(r0, r1, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       ldxbr_d(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
 static void
 _sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
@@ -1291,4 +1365,50 @@ _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
        jit_unget_reg(reg);
     }
 }
+
+static void
+_stxbr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    STFSUX(r2, r1, r0);
+}
+
+static void
+_stxbi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       STFSU(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxbr_f(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_stxbr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    assert(r1 != _R0_REGNO);
+    STFDUX(r2, r1, r0);
+}
+
+static void
+_stxbi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    if (can_sign_extend_short_p(i0)) {
+       assert(r1 != _R0_REGNO);
+       STFDU(r1, r0, i0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       stxbr_d(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
 #endif
index 136f1d4..c275d28 100644 (file)
     12, /* hmuli */
     4, /* hmulr_u */
     12, /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    12, /* ldxar_c */
+    12, /* ldxai_c */
+    4, /* ldxbr_uc */
+    4, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    4, /* ldxbr_s */
+    4, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    4, /* ldxbr_us */
+    4, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    4, /* ldxbr_i */
+    4, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    4, /* ldxbr_f */
+    4, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    4, /* ldxbr_d */
+    4, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    4, /* stxbr_c */
+    4, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    4, /* stxbr_s */
+    4, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    4, /* stxbr_i */
+    4, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    4, /* stxbr_f */
+    4, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    4, /* stxbr_d */
+    4, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* !_CALL_SYSV */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
     12, /* hmuli */
     4, /* hmulr_u */
     12, /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    12, /* ldxar_c */
+    12, /* ldxai_c */
+    4, /* ldxbr_uc */
+    4, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    4, /* ldxbr_s */
+    4, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    4, /* ldxbr_us */
+    4, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    4, /* ldxbr_i */
+    4, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    4, /* ldxbr_f */
+    4, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    4, /* ldxbr_d */
+    4, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    4, /* stxbr_c */
+    4, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    4, /* stxbr_s */
+    4, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    4, /* stxbr_i */
+    4, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    4, /* stxbr_f */
+    4, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    4, /* stxbr_d */
+    4, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* _CALL_SYSV */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
     24,        /* hmuli */
     4, /* hmulr_u */
     24,        /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    12, /* ldxar_c */
+    12, /* ldxai_c */
+    4, /* ldxbr_uc */
+    4, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    4, /* ldxbr_s */
+    4, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    4, /* ldxbr_us */
+    4, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    4, /* ldxbr_ui */
+    4, /* ldxbi_ui */
+    8, /* ldxar_ui */
+    8, /* ldxai_ui */
+    4, /* ldxbr_l */
+    4, /* ldxbi_l */
+    8, /* ldxar_l */
+    8, /* ldxai_l */
+    4, /* ldxbr_f */
+    4, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    4, /* ldxbr_d */
+    4, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    4, /* stxbr_c */
+    4, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    4, /* stxbr_s */
+    4, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    4, /* stxbr_i */
+    4, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    4, /* stxbr_l */
+    4, /* stxbi_l */
+    8, /* stxar_l */
+    8, /* stxai_l */
+    4, /* stxbr_f */
+    4, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    4, /* stxbr_d */
+    4, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
     24,        /* hmuli */
     4, /* hmulr_u */
     24,        /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    12, /* ldxar_c */
+    12, /* ldxai_c */
+    4, /* ldxbr_uc */
+    4, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    4, /* ldxbr_s */
+    4, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    4, /* ldxbr_us */
+    4, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    4, /* ldxbr_ui */
+    4, /* ldxbi_ui */
+    8, /* ldxar_ui */
+    8, /* ldxai_ui */
+    4, /* ldxbr_l */
+    4, /* ldxbi_l */
+    8, /* ldxar_l */
+    8, /* ldxai_l */
+    4, /* ldxbr_f */
+    4, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    4, /* ldxbr_d */
+    4, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    4, /* stxbr_c */
+    4, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    4, /* stxbr_s */
+    4, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    4, /* stxbr_i */
+    4, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    4, /* stxbr_l */
+    4, /* stxbi_l */
+    8, /* stxar_l */
+    8, /* stxai_l */
+    4, /* stxbr_f */
+    4, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    4, /* stxbr_d */
+    4, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __BYTE_ORDER */
 #endif /* __powerpc__ */
 #endif /* __WORDSIZE */
index 9f98176..3d506ba 100644 (file)
  * Authors:
  *     Paulo Cesar Pereira de Andrade
  */
+#define CHECK_POPCNTB  0
+
+#if CHECK_POPCNTB
+#include <signal.h>
+#include <setjmp.h>
+#endif
 
 #define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
 #if !_CALL_SYSV
@@ -112,6 +118,7 @@ extern void __clear_cache(void *, void *);
 /*
  * Initialization
  */
+jit_cpu_t              jit_cpu;
 jit_register_t         _rvs[] = {
     { rc(sav) | 0,                     "r0" },
     { rc(sav) | 11,                    "r11" },        /* env */
@@ -187,6 +194,9 @@ jit_register_t              _rvs[] = {
     { rc(arg) | rc(fpr) | 1,           "f1" },
     { _NOREG,                          "<none>" },
 };
+#if CHECK_POPCNTB
+static sigjmp_buf      jit_env;
+#endif
 
 static jit_int32_t iregs[] = {
     _R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22,
@@ -200,9 +210,57 @@ static jit_int32_t fregs[] = {
 /*
  * Implementation
  */
+#if CHECK_POPCNTB
+static void
+sigill_handler(int signum)
+{
+    jit_cpu.popcntb = 0;
+    siglongjmp(jit_env, 1);
+}
+#endif
+
 void
 jit_get_cpu(void)
 {
+#if CHECK_POPCNTB
+    long               r12;
+    struct             sigaction new_action, old_action;
+    new_action.sa_handler = sigill_handler;
+    sigemptyset(&new_action.sa_mask);
+    new_action.sa_flags = 0;
+    sigaction(SIGILL, NULL, &old_action);
+    if (old_action.sa_handler != SIG_IGN) {
+       sigaction(SIGILL, &new_action, NULL);
+       if (!sigsetjmp(jit_env, 1)) {
+           jit_cpu.popcntb = 1;
+           /* popcntb %r12, %r12 */
+           __asm__ volatile("mr %%r12, %0;"
+                            "popcntb %%r12, %%r12;"
+                            "mr %0, %%r12;"
+                            : "=r" (r12), "=r" (r12));
+           sigaction(SIGILL, &old_action, NULL);
+       }
+    }
+#elif defined(__linux__)
+    FILE       *fp;
+    char       *ptr;
+    long        vers;
+    char        buf[128];
+
+    if ((fp = fopen("/proc/cpuinfo", "r")) != NULL) {
+       while (fgets(buf, sizeof(buf), fp)) {
+           if (strncmp(buf, "cpu\t\t: POWER", 12) == 0) {
+               vers = strtol(buf + 12, &ptr, 10);
+               jit_cpu.popcntb = vers > 5;
+               break;
+           }
+       }
+       fclose(fp);
+    }
+#else
+    /* By default, assume it is not available */
+    jit_cpu.popcntb = 0;
+#endif
 }
 
 void
@@ -1262,6 +1320,24 @@ _emit_code(jit_state_t *_jit)
                name##r##type(rn(node->u.w),                            \
                              rn(node->v.w), rn(node->w.w));            \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w),             \
+                             rn(node->w.w));                           \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1604,6 +1680,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+#if __WORDSIZE == 64
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+#endif
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rrr(stx, _c);
@@ -1630,6 +1726,20 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+#if __WORDSIZE == 64
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+#endif
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_rr(mov, _f);
            case jit_code_movi_f:
                assert(node->flag & jit_flag_data);
@@ -2167,6 +2277,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_wrr
 #undef case_rrf
 #undef case_rrw
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrr
 #undef case_wr
 #undef case_rw
index c08e5bd..5aa243e 100644 (file)
     16, /* hmuli */
     4, /* hmulr_u */
     16, /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    8, /* ldxar_c */
+    8, /* ldxai_c */
+    8, /* ldxbr_uc */
+    8, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    8, /* ldxbr_s */
+    8, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    8, /* ldxbr_us */
+    8, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    8, /* ldxbr_ui */
+    8, /* ldxbi_ui */
+    8, /* ldxar_ui */
+    8, /* ldxai_ui */
+    8, /* ldxbr_l */
+    8, /* ldxbi_l */
+    8, /* ldxar_l */
+    8, /* ldxai_l */
+    8, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    8, /* ldxbr_d */
+    8, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    8, /* stxbr_c */
+    8, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    8, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    8, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    8, /* stxbr_l */
+    8, /* stxbi_l */
+    8, /* stxar_l */
+    8, /* stxai_l */
+    8, /* stxbr_f */
+    8, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    8, /* stxbr_d */
+    8, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __WORDSIZE */
index 27b0c5a..c787efb 100644 (file)
@@ -1015,6 +1015,26 @@ _emit_code(jit_state_t *_jit)
                name##r##type(rn(node->u.w),                            \
                              rn(node->v.w), rn(node->w.w));            \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               generic_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               generic_##name##i##type(node->u.w, rn(node->v.w),       \
+                                       rn(node->w.w));                 \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1263,6 +1283,24 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1285,6 +1323,18 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_rr(hton, _us);
                case_rr(hton, _ui);
                case_rr(hton, _ul);
@@ -1877,6 +1927,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_brr
 #undef case_wrr
 #undef case_rrw
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrr
 #undef case_wr
 #undef case_rw
index b8a87e8..498a49a 100644 (file)
@@ -1,5 +1,5 @@
 #if __WORDSIZE == 32
-#define JIT_INSTR_MAX 630
+#define JIT_INSTR_MAX 200
     0, /* data */
     0, /* live */
     4, /* align */
     12,        /* qlshi */
     66,        /* qlshr_u */
     12,        /* qlshi_u */
-    70,        /* qrshr */
+    68,        /* qrshr */
     12,        /* qrshi */
     66,        /* qrshr_u */
     12,        /* qrshi_u */
     38,        /* unldi */
     86,        /* unldr_u */
     38,        /* unldi_u */
-    238,       /* unstr */
-    100,       /* unsti */
+    84,        /* unstr */
+    42,        /* unsti */
     200,       /* unldr_x */
     86,        /* unldi_x */
-    630,       /* unstr_x */
-    294,       /* unsti_x */
+    194,       /* unstr_x */
+    102,       /* unsti_x */
     8, /* fmar_f */
     0, /* fmai_f */
     8, /* fmsr_f */
     0, /* fnmai_d */
     10,        /* fnmsr_d */
     0, /* fnmsi_d */
-    34, /* hmulr */
-    42, /* hmuli */
+    34,        /* hmulr */
+    42,        /* hmuli */
     8, /* hmulr_u */
-    16, /* hmuli_u */
+    16,        /* hmuli_u */
+    8, /* ldxbr_c */
+    10,        /* ldxbi_c */
+    8, /* ldxar_c */
+    10,        /* ldxai_c */
+    8, /* ldxbr_uc */
+    10,        /* ldxbi_uc */
+    8, /* ldxar_uc */
+    10,        /* ldxai_uc */
+    6, /* ldxbr_s */
+    8, /* ldxbi_s */
+    6, /* ldxar_s */
+    8, /* ldxai_s */
+    8, /* ldxbr_us */
+    10,        /* ldxbi_us */
+    8, /* ldxar_us */
+    10,        /* ldxai_us */
+    8, /* ldxbr_i */
+    10,        /* ldxbi_i */
+    8, /* ldxar_i */
+    10,        /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    6, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    6, /* ldxbr_d */
+    8, /* ldxbi_d */
+    6, /* ldxar_d */
+    8, /* ldxai_d */
+    6, /* stxbr_c */
+    8, /* stxbi_c */
+    6, /* stxar_c */
+    8, /* stxai_c */
+    6, /* stxbr_s */
+    8, /* stxbi_s */
+    6, /* stxar_s */
+    8, /* stxai_s */
+    6, /* stxbr_i */
+    8, /* stxbi_i */
+    6, /* stxar_i */
+    8, /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    6, /* stxbr_f */
+    8, /* stxbi_f */
+    6, /* stxar_f */
+    8, /* stxai_f */
+    6, /* stxbr_d */
+    8, /* stxbi_d */
+    6, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
-#define JIT_INSTR_MAX 364
+#define JIT_INSTR_MAX 264
     0, /* data */
     0, /* live */
     20,        /* align */
     12,        /* qlshi */
     74,        /* qlshr_u */
     12,        /* qlshi_u */
-    78,        /* qrshr */
+    76,        /* qrshr */
     12,        /* qrshi */
     74,        /* qrshr_u */
     12,        /* qrshi_u */
     58,        /* unldi */
     122,       /* unldr_u */
     58,        /* unldi_u */
-    296,       /* unstr */
-    150,       /* unsti */
+    252,       /* unstr */
+    82,        /* unsti */
     130,       /* unldr_x */
     70,        /* unldi_x */
-    364,       /* unstr_x */
-    194,       /* unsti_x */
+    264,       /* unstr_x */
+    94,        /* unsti_x */
     8, /* fmar_f */
     0, /* fmai_f */
     8, /* fmsr_f */
     0, /* fnmai_d */
     10,        /* fnmsr_d */
     0, /* fnmsi_d */
-    44, /* hmulr */
-    60, /* hmuli */
-    12, /* hmulr_u */
-    28, /* hmuli_u */
+    44,        /* hmulr */
+    60,        /* hmuli */
+    12,        /* hmulr_u */
+    28,        /* hmuli_u */
+    10,        /* ldxbr_c */
+    10,        /* ldxbi_c */
+    10,        /* ldxar_c */
+    10,        /* ldxai_c */
+    10,        /* ldxbr_uc */
+    10,        /* ldxbi_uc */
+    10,        /* ldxar_uc */
+    10,        /* ldxai_uc */
+    10,        /* ldxbr_s */
+    10,        /* ldxbi_s */
+    10,        /* ldxar_s */
+    10,        /* ldxai_s */
+    10,        /* ldxbr_us */
+    10,        /* ldxbi_us */
+    10,        /* ldxar_us */
+    10,        /* ldxai_us */
+    10,        /* ldxbr_i */
+    10,        /* ldxbi_i */
+    10,        /* ldxar_i */
+    10,        /* ldxai_i */
+    10,        /* ldxbr_ui */
+    10,        /* ldxbi_ui */
+    10,        /* ldxar_ui */
+    10,        /* ldxai_ui */
+    10,        /* ldxbr_l */
+    10,        /* ldxbi_l */
+    10,        /* ldxar_l */
+    10,        /* ldxai_l */
+    8, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    8, /* ldxbr_d */
+    8, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    8, /* stxbr_c */
+    8, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    8, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    8, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    10,        /* stxbr_l */
+    10,        /* stxbi_l */
+    10,        /* stxar_l */
+    10,        /* stxai_l */
+    8, /* stxbr_f */
+    8, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    8, /* stxbr_d */
+    8, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __WORDSIZE */
index 851d0d0..9a8373f 100644 (file)
@@ -973,6 +973,26 @@ _emit_code(jit_state_t *_jit)
                name##r##type(rn(node->u.w),                            \
                              rn(node->v.w), rn(node->w.w));            \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               generic_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               generic_##name##i##type(node->u.w, rn(node->v.w),       \
+                                       rn(node->w.w));                 \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1231,6 +1251,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+#if __WORDSIZE == 64
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+#endif
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1259,6 +1299,20 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+#if __WORDSIZE == 64
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+#endif
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_rr(hton, _us);
                case_rr(hton, _ui);
 #if __WORDSIZE == 64
@@ -1864,6 +1918,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_brr
 #undef case_wrr
 #undef case_rrw
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrr
 #undef case_wr
 #undef case_rw
diff --git a/deps/lightning/lib/jit_sh-cpu.c b/deps/lightning/lib/jit_sh-cpu.c
new file mode 100644 (file)
index 0000000..cfb8b26
--- /dev/null
@@ -0,0 +1,3209 @@
+/*
+ * Copyright (C) 2022  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paul Cercueil
+ */
+
+#if PROTO
+
+#  ifdef __SH4_SINGLE__
+#    define SH_DEFAULT_FPU_MODE 0
+#  else
+#    define SH_DEFAULT_FPU_MODE 1
+#  endif
+
+#  ifndef SH_HAS_FPU
+#    ifdef __SH_FPU_ANY__
+#      define SH_HAS_FPU 1
+#    else
+#      define SH_HAS_FPU 0
+#    endif
+#  endif
+
+#  ifdef __SH4_SINGLE_ONLY__
+#    define SH_SINGLE_ONLY 1
+#  else
+#    define SH_SINGLE_ONLY 0
+#  endif
+
+
+struct jit_instr_ni {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       jit_uint16_t i :8;
+       jit_uint16_t n :4;
+       jit_uint16_t c :4;
+#else
+       jit_uint16_t c :4;
+       jit_uint16_t n :4;
+       jit_uint16_t i :8;
+#endif
+};
+
+struct jit_instr_nmd {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       jit_uint16_t d :4;
+       jit_uint16_t m :4;
+       jit_uint16_t n :4;
+       jit_uint16_t c :4;
+#else
+       jit_uint16_t c :4;
+       jit_uint16_t n :4;
+       jit_uint16_t m :4;
+       jit_uint16_t d :4;
+#endif
+};
+
+struct jit_instr_md {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       jit_uint16_t d :4;
+       jit_uint16_t m :4;
+       jit_uint16_t c :8;
+#else
+       jit_uint16_t c :8;
+       jit_uint16_t m :4;
+       jit_uint16_t d :4;
+#endif
+};
+
+struct jit_instr_d {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       jit_uint16_t d :12;
+       jit_uint16_t c :4;
+#else
+       jit_uint16_t c :4;
+       jit_uint16_t d :12;
+#endif
+};
+
+typedef union {
+    struct jit_instr_ni ni;
+    struct jit_instr_nmd nmd;
+    struct jit_instr_md md;
+    struct jit_instr_d d;
+    jit_uint16_t op;
+} jit_instr_t;
+
+static void _cni(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+static void
+_cnmd(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+static void _cmd(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+static void _cd(jit_state_t*,jit_uint16_t,jit_uint16_t);
+
+#    define STRB(rn, rm)               _cnmd(_jit, 0x0, rn, rm, 0x4)
+#    define STRW(rn, rm)               _cnmd(_jit, 0x0, rn, rm, 0x5)
+#    define STRL(rn, rm)               _cnmd(_jit, 0x0, rn, rm, 0x6)
+#    define MULL(rn, rm)               _cnmd(_jit, 0x0, rn, rm, 0x7)
+#    define LDRB(rn, rm)               _cnmd(_jit, 0x0, rn, rm, 0xc)
+#    define LDRW(rn, rm)               _cnmd(_jit, 0x0, rn, rm, 0xd)
+#    define LDRL(rn, rm)               _cnmd(_jit, 0x0, rn, rm, 0xe)
+#    define BSRF(rn)                   _cni(_jit, 0x0, rn, 0x03)
+#    define STCGBR(rn)                 _cni(_jit, 0x0, rn, 0x12)
+#    define STSH(rn)                   _cni(_jit, 0x0, rn, 0x0a)
+#    define STSL(rn)                   _cni(_jit, 0x0, rn, 0x1a)
+#    define BRAF(rn)                   _cni(_jit, 0x0, rn, 0x23)
+#    define MOVT(rn)                   _cni(_jit, 0x0, rn, 0x29)
+
+#    define STSPR(rn)                  _cni(_jit, 0x0, rn, 0x2a)
+#    define STSUL(rn)                  _cni(_jit, 0x0, rn, 0x5a)
+#    define STSFP(rn)                  _cni(_jit, 0x0, rn, 0x6a)
+
+#    define STDL(rn, rm, imm)          _cnmd(_jit, 0x1, rn, rm, imm)
+
+#    define STB(rn, rm)                        _cnmd(_jit, 0x2, rn, rm, 0x0)
+#    define STW(rn, rm)                        _cnmd(_jit, 0x2, rn, rm, 0x1)
+#    define STL(rn, rm)                        _cnmd(_jit, 0x2, rn, rm, 0x2)
+#    define STBU(rn, rm)               _cnmd(_jit, 0x2, rn, rm, 0x4)
+#    define STWU(rn, rm)               _cnmd(_jit, 0x2, rn, rm, 0x5)
+#    define STLU(rn, rm)               _cnmd(_jit, 0x2, rn, rm, 0x6)
+#    define DIV0S(rn, rm)              _cnmd(_jit, 0x2, rn, rm, 0x7)
+#    define TST(rn, rm)                        _cnmd(_jit, 0x2, rn, rm, 0x8)
+#    define AND(rn, rm)                        _cnmd(_jit, 0x2, rn, rm, 0x9)
+#    define XOR(rn, rm)                        _cnmd(_jit, 0x2, rn, rm, 0xa)
+#    define OR(rn, rm)                 _cnmd(_jit, 0x2, rn, rm, 0xb)
+
+#    define CMPEQ(rn, rm)              _cnmd(_jit, 0x3, rn, rm, 0x0)
+#    define CMPHS(rn, rm)              _cnmd(_jit, 0x3, rn, rm, 0x2)
+#    define CMPGE(rn, rm)              _cnmd(_jit, 0x3, rn, rm, 0x3)
+#    define DIV1(rn, rm)               _cnmd(_jit, 0x3, rn, rm, 0x4)
+#    define DMULU(rn, rm)              _cnmd(_jit, 0x3, rn, rm, 0x5)
+#    define CMPHI(rn, rm)              _cnmd(_jit, 0x3, rn, rm, 0x6)
+#    define CMPGT(rn, rm)              _cnmd(_jit, 0x3, rn, rm, 0x7)
+#    define SUB(rn, rm)                        _cnmd(_jit, 0x3, rn, rm, 0x8)
+#    define SUBC(rn, rm)               _cnmd(_jit, 0x3, rn, rm, 0xa)
+#    define SUBV(rn, rm)               _cnmd(_jit, 0x3, rn, rm, 0xb)
+#    define ADD(rn, rm)                        _cnmd(_jit, 0x3, rn, rm, 0xc)
+#    define ADDC(rn, rm)               _cnmd(_jit, 0x3, rn, rm, 0xe)
+#    define ADDV(rn, rm)               _cnmd(_jit, 0x3, rn, rm, 0xf)
+#    define DMULS(rn, rm)              _cnmd(_jit, 0x3, rn, rm, 0xd)
+
+#    define SHLL(rn)                   _cni(_jit, 0x4, rn, 0x00)
+#    define SHLR(rn)                   _cni(_jit, 0x4, rn, 0x01)
+#    define ROTL(rn)                   _cni(_jit, 0x4, rn, 0x04)
+#    define ROTR(rn)                   _cni(_jit, 0x4, rn, 0x05)
+#    define SHLL2(rn)                  _cni(_jit, 0x4, rn, 0x08)
+#    define SHLR2(rn)                  _cni(_jit, 0x4, rn, 0x09)
+#    define JSR(rn)                    _cni(_jit, 0x4, rn, 0x0b)
+#    define DT(rn)                     _cni(_jit, 0x4, rn, 0x10)
+#    define CMPPZ(rn)                  _cni(_jit, 0x4, rn, 0x11)
+#    define CMPPL(rn)                  _cni(_jit, 0x4, rn, 0x15)
+#    define SHLL8(rn)                  _cni(_jit, 0x4, rn, 0x18)
+#    define SHLR8(rn)                  _cni(_jit, 0x4, rn, 0x19)
+#    define TAS(rn)                    _cni(_jit, 0x4, rn, 0x1b)
+#    define LDCGBR(rm)                 _cni(_jit, 0x4, rm, 0x1e)
+#    define SHAL(rn)                   _cni(_jit, 0x4, rn, 0x20)
+#    define SHAR(rn)                   _cni(_jit, 0x4, rn, 0x21)
+#    define ROTCL(rn)                  _cni(_jit, 0x4, rn, 0x24)
+#    define ROTCR(rn)                  _cni(_jit, 0x4, rn, 0x25)
+#    define SHLL16(rn)                 _cni(_jit, 0x4, rn, 0x28)
+#    define SHLR16(rn)                 _cni(_jit, 0x4, rn, 0x29)
+#    define LDSPR(rn)                  _cni(_jit, 0x4, rn, 0x2a)
+#    define JMP(rn)                    _cni(_jit, 0x4, rn, 0x2b)
+#    define LDS(rn)                    _cni(_jit, 0x4, rn, 0x5a)
+#    define LDSFP(rn)                  _cni(_jit, 0x4, rn, 0x6a)
+#    define SHAD(rn, rm)               _cnmd(_jit, 0x4, rn, rm, 0xc)
+#    define SHLD(rn, rm)               _cnmd(_jit, 0x4, rn, rm, 0xd)
+
+#    define LDDL(rn, rm, imm)          _cnmd(_jit, 0x5, rn, rm, imm)
+
+#    define LDB(rn, rm)                        _cnmd(_jit, 0x6, rn, rm, 0x0)
+#    define LDW(rn, rm)                        _cnmd(_jit, 0x6, rn, rm, 0x1)
+#    define LDL(rn, rm)                        _cnmd(_jit, 0x6, rn, rm, 0x2)
+#    define MOV(rn, rm)                        _cnmd(_jit, 0x6, rn, rm, 0x3)
+#    define LDBU(rn, rm)               _cnmd(_jit, 0x6, rn, rm, 0x4)
+#    define LDWU(rn, rm)               _cnmd(_jit, 0x6, rn, rm, 0x5)
+#    define LDLU(rn, rm)               _cnmd(_jit, 0x6, rn, rm, 0x6)
+#    define NOT(rn, rm)                        _cnmd(_jit, 0x6, rn, rm, 0x7)
+#    define SWAPB(rn, rm)              _cnmd(_jit, 0x6, rn, rm, 0x8)
+#    define SWAPW(rn, rm)              _cnmd(_jit, 0x6, rn, rm, 0x9)
+#    define NEGC(rn, rm)               _cnmd(_jit, 0x6, rn, rm, 0xa)
+#    define NEG(rn, rm)                        _cnmd(_jit, 0x6, rn, rm, 0xb)
+#    define EXTUB(rn, rm)              _cnmd(_jit, 0x6, rn, rm, 0xc)
+#    define EXTUW(rn, rm)              _cnmd(_jit, 0x6, rn, rm, 0xd)
+#    define EXTSB(rn, rm)              _cnmd(_jit, 0x6, rn, rm, 0xe)
+#    define EXTSW(rn, rm)              _cnmd(_jit, 0x6, rn, rm, 0xf)
+
+#    define ADDI(rn, imm)              _cni(_jit, 0x7, rn, imm)
+
+#    define LDDB(rm, imm)              _cnmd(_jit, 0x8, 0x4, rm, imm)
+#    define LDDW(rm, imm)              _cnmd(_jit, 0x8, 0x5, rm, imm)
+#    define CMPEQI(imm)                        _cni(_jit, 0x8, 0x8, imm)
+#    define BT(imm)                    _cni(_jit, 0x8, 0x9, imm)
+#    define BF(imm)                    _cni(_jit, 0x8, 0xb, imm)
+#    define BTS(imm)                   _cni(_jit, 0x8, 0xd, imm)
+#    define BFS(imm)                   _cni(_jit, 0x8, 0xf, imm)
+
+#    define LDPW(rn, imm)              _cni(_jit, 0x9, rn, imm)
+
+#    define BRA(imm)                   _cd(_jit, 0xa, imm)
+
+#    define BSR(imm)                   _cd(_jit, 0xb, imm)
+
+#    define GBRSTB(imm)                        _cni(_jit, 0xc, 0x0, imm)
+#    define GBRSTW(imm)                        _cni(_jit, 0xc, 0x1, imm)
+#    define GBRSTL(imm)                        _cni(_jit, 0xc, 0x2, imm)
+#    define GBRLDB(imm)                        _cni(_jit, 0xc, 0x4, imm)
+#    define GBRLDW(imm)                        _cni(_jit, 0xc, 0x5, imm)
+#    define GBRLDL(imm)                        _cni(_jit, 0xc, 0x6, imm)
+#    define MOVA(imm)                  _cni(_jit, 0xc, 0x7, imm)
+#    define TSTI(imm)                  _cni(_jit, 0xc, 0x8, imm)
+#    define ANDI(imm)                  _cni(_jit, 0xc, 0x9, imm)
+#    define XORI(imm)                  _cni(_jit, 0xc, 0xa, imm)
+#    define ORI(imm)                   _cni(_jit, 0xc, 0xb, imm)
+
+#    define LDPL(rn, imm)              _cni(_jit, 0xd, rn, imm)
+
+#    define MOVI(rn, imm)              _cni(_jit, 0xe, rn, imm)
+
+#    define FADD(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0x0)
+#    define FSUB(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0x1)
+#    define FMUL(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0x2)
+#    define FDIV(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0x3)
+#    define FCMPEQ(rn,rm)              _cnmd(_jit, 0xf, rn, rm, 0x4)
+#    define FCMPGT(rn,rm)              _cnmd(_jit, 0xf, rn, rm, 0x5)
+#    define LDXF(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0x6)
+#    define STXF(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0x7)
+#    define LDF(rn, rm)                        _cnmd(_jit, 0xf, rn, rm, 0x8)
+#    define LDFS(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0x9)
+#    define STF(rn, rm)                        _cnmd(_jit, 0xf, rn, rm, 0xa)
+#    define STFS(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0xb)
+#    define FMOV(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0xc)
+#    define FMAC(rn, rm)               _cnmd(_jit, 0xf, rn, rm, 0xe)
+#    define FSTS(rn)                   _cni(_jit, 0xf, rn, 0x0d)
+#    define FLDS(rn)                   _cni(_jit, 0xf, rn, 0x1d)
+#    define FLOAT(rn)                  _cni(_jit, 0xf, rn, 0x2d)
+#    define FTRC(rn)                   _cni(_jit, 0xf, rn, 0x3d)
+#    define FNEG(rn)                   _cni(_jit, 0xf, rn, 0x4d)
+#    define FABS(rn)                   _cni(_jit, 0xf, rn, 0x5d)
+#    define FSQRT(rn)                  _cni(_jit, 0xf, rn, 0x6d)
+#    define FLDI0(rn)                  _cni(_jit, 0xf, rn, 0x8d)
+#    define FLDI1(rn)                  _cni(_jit, 0xf, rn, 0x9d)
+#    define FCNVSD(rn)                 _cni(_jit, 0xf, rn, 0xad)
+#    define FCNVDS(rn)                 _cni(_jit, 0xf, rn, 0xbd)
+
+#    define FMOVXX(rn, rm)             FMOV((rn) | 1, (rm) | 1)
+#    define FMOVDX(rn, rm)             FMOV((rn) | 0, (rm) | 1)
+#    define FMOVXD(rn, rm)             FMOV((rn) | 1, (rm) | 0)
+
+#    define CLRT()                     ii(0x8)
+#    define NOP()                      ii(0x9)
+#    define RTS()                      ii(0xb)
+#    define SETT()                     ii(0x18)
+#    define DIV0U()                    ii(0x19)
+#    define FSCHG()                    ii(0xf3fd)
+#    define FRCHG()                    ii(0xfbfd)
+
+#    define ii(i)                      *_jit->pc.us++ = i
+
+#    define stack_framesize            ((JIT_V_NUM + 2) * 4)
+
+#    define PR_FLAG                    (1 << 19)
+#    define SZ_FLAG                    (1 << 20)
+#    define FR_FLAG                    (1 << 21)
+
+static void _nop(jit_state_t*,jit_word_t);
+#    define nop(i0)                    _nop(_jit,i0)
+static void _movr(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#    define movr(r0,r1)                        _movr(_jit,r0,r1)
+static void _movi(jit_state_t*,jit_uint16_t,jit_word_t);
+#    define movi(r0,i0)                        _movi(_jit,r0,i0)
+static void _movnr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_bool_t);
+#    define movnr(r0,r1,r2)            _movnr(_jit,r0,r1,r2,1)
+#    define movzr(r0,r1,r2)            _movnr(_jit,r0,r1,r2,0)
+#    define casx(r0,r1,r2,r3,i0)       _casx(_jit,r0,r1,r2,r3,i0)
+static void _casx(jit_state_t *_jit,jit_int32_t,jit_int32_t,
+                 jit_int32_t,jit_int32_t,jit_word_t);
+#    define casr(r0,r1,r2,r3)          casx(r0,r1,r2,r3,0)
+#    define casi(r0,i0,r1,r2)          casx(r0,_NOREG,r1,r2,i0)
+static void _addr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define addr(r0,r1,r2)             _addr(_jit,r0,r1,r2)
+static void _addcr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define addcr(r0,r1,r2)            _addcr(_jit,r0,r1,r2)
+static void _addxr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define addxr(r0,r1,r2)            _addxr(_jit,r0,r1,r2)
+static void _addi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define addi(r0,r1,i0)             _addi(_jit,r0,r1,i0)
+static void _addci(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define addci(r0,r1,i0)            _addci(_jit,r0,r1,i0)
+static void _addxi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define addxi(r0,r1,i0)            _addxi(_jit,r0,r1,i0)
+static void _subr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define subr(r0,r1,r2)             _subr(_jit,r0,r1,r2)
+static void _subcr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define subcr(r0,r1,r2)            _subcr(_jit,r0,r1,r2)
+static void _subxr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define subxr(r0,r1,r2)            _subxr(_jit,r0,r1,r2)
+static void _subi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define subi(r0,r1,i0)             _subi(_jit,r0,r1,i0)
+static void _subci(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define subci(r0,r1,i0)            _subci(_jit,r0,r1,i0)
+static void _subxi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define subxi(r0,r1,i0)            _subxi(_jit,r0,r1,i0)
+static void _rsbi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define rsbi(r0,r1,i0)             _rsbi(_jit,r0,r1,i0)
+static void _mulr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define mulr(r0,r1,r2)             _mulr(_jit,r0,r1,r2)
+static void _hmulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define hmulr(r0,r1,r2)              _hmulr(_jit,r0,r1,r2)
+static void _hmuli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define hmuli(r0,r1,i0)              _hmuli(_jit,r0,r1,i0)
+static void _hmulr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define hmulr_u(r0,r1,r2)            _hmulr_u(_jit,r0,r1,r2)
+static void _hmuli_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define hmuli_u(r0,r1,i0)            _hmuli_u(_jit,r0,r1,i0)
+static void _qmulr(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                  jit_uint16_t,jit_uint16_t);
+#    define qmulr(r0,r1,r2,r3)         _qmulr(_jit,r0,r1,r2,r3)
+static void _qmulr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                    jit_uint16_t,jit_uint16_t);
+#    define qmulr_u(r0,r1,r2,r3)       _qmulr_u(_jit,r0,r1,r2,r3)
+static void _muli(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define muli(r0,r1,i0)             _muli(_jit,r0,r1,i0)
+static void _qmuli(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                  jit_uint16_t,jit_word_t);
+#    define qmuli(r0,r1,r2,i0)         _qmuli(_jit,r0,r1,r2,i0)
+static void _qmuli_u(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                    jit_uint16_t,jit_word_t);
+#    define qmuli_u(r0,r1,r2,i0)       _qmuli_u(_jit,r0,r1,r2,i0)
+static void _divr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define divr(r0,r1,r2)             _divr(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define divr_u(r0,r1,r2)           _divr_u(_jit,r0,r1,r2)
+static void _qdivr(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                  jit_uint16_t,jit_uint16_t);
+#    define qdivr(r0,r1,r2,r3)         _qdivr(_jit,r0,r1,r2,r3)
+static void _qdivr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                    jit_uint16_t,jit_uint16_t);
+#    define qdivr_u(r0,r1,r2,r3)       _qdivr_u(_jit,r0,r1,r2,r3)
+static void _divi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define divi(r0,r1,i0)             _divi(_jit,r0,r1,i0)
+#    define divi_u(r0,r1,i0)           fallback_divi_u(r0,r1,i0)
+static void _qdivi(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                  jit_uint16_t,jit_word_t);
+#    define qdivi(r0,r1,r2,i0)         _qdivi(_jit,r0,r1,r2,i0)
+static void _qdivi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                    jit_uint16_t,jit_word_t);
+#    define qdivi_u(r0,r1,r2,i0)       _qdivi_u(_jit,r0,r1,r2,i0)
+static void _remr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define remr(r0,r1,r2)             _remr(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define remr_u(r0,r1,r2)           _remr_u(_jit,r0,r1,r2)
+static void _remi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define remi(r0,r1,i0)             _remi(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define remi_u(r0,r1,i0)           _remi_u(_jit,r0,r1,i0)
+#    define bswapr_us(r0,r1)           _bswapr_us(_jit,r0,r1)
+static void _bswapr_us(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#    define bswapr_ui(r0,r1)           _bswapr_ui(_jit,r0,r1)
+static void _bswapr_ui(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#define extr(r0,r1,i0,i1)              fallback_ext(r0,r1,i0,i1)
+#define extr_u(r0,r1,i0,i1)            fallback_ext_u(r0,r1,i0,i1)
+#define depr(r0,r1,i0,i1)              fallback_dep(r0,r1,i0,i1)
+#    define extr_c(r0, r1)             EXTSB(r0,r1)
+#    define extr_s(r0,r1)              EXTSW(r0,r1)
+#    define extr_uc(r0,r1)             EXTUB(r0,r1)
+#    define extr_us(r0,r1)             EXTUW(r0,r1)
+static void _lrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define lrotr(r0,r1,r2)            _lrotr(_jit,r0,r1,r2)
+static void _rrotr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#    define rrotr(r0,r1,r2)            _rrotr(_jit,r0,r1,r2)
+static void _rroti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#    define rroti(r0,r1,i0)            _rroti(_jit,r0,r1,i0)
+#    define lroti(r0,r1,i0)            rroti(r0,r1,__WORDSIZE-i0)
+static void _andr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define andr(r0,r1,r2)             _andr(_jit,r0,r1,r2)
+static void _andi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define andi(r0,r1,i0)             _andi(_jit,r0,r1,i0)
+static void _orr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define orr(r0,r1,r2)              _orr(_jit,r0,r1,r2)
+static void _ori(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define ori(r0,r1,i0)              _ori(_jit,r0,r1,i0)
+static void _xorr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define xorr(r0,r1,r2)             _xorr(_jit,r0,r1,r2)
+static void _xori(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define xori(r0,r1,i0)             _xori(_jit,r0,r1,i0)
+#    define comr(r0,r1)                        NOT(r0,r1)
+#    define negr(r0,r1)                        NEG(r0,r1)
+static void _clor(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define clor(r0,r1)                        _clor(_jit,r0,r1)
+static void _clzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define clzr(r0,r1)                        _clzr(_jit,r0,r1)
+static void _ctor(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define ctor(r0,r1)                        _ctor(_jit,r0,r1)
+static void _ctzr(jit_state_t*, jit_int32_t, jit_int32_t);
+#    define ctzr(r0,r1)                        _ctzr(_jit,r0,r1)
+static void _rbitr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define rbitr(r0, r1)                        _rbitr(_jit, r0, r1)
+static void _popcntr(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define popcntr(r0, r1)              _popcntr(_jit, r0, r1)
+static void _gtr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define gtr(r0,r1,r2)              _gtr(_jit,r0,r1,r2)
+static void _ger(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define ger(r0,r1,r2)              _ger(_jit,r0,r1,r2)
+static void _gtr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define gtr_u(r0,r1,r2)            _gtr_u(_jit,r0,r1,r2)
+static void _ger_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define ger_u(r0,r1,r2)            _ger_u(_jit,r0,r1,r2)
+#    define ltr(r0,r1,r2)              gtr(r0,r2,r1)
+#    define ltr_u(r0,r1,r2)            gtr_u(r0,r2,r1)
+#    define ler(r0,r1,r2)              ger(r0,r2,r1)
+#    define ler_u(r0,r1,r2)            ger_u(r0,r2,r1)
+static void _eqr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define eqr(r0,r1,r2)              _eqr(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define ner(r0,r1,r2)              _ner(_jit,r0,r1,r2)
+static void _eqi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define eqi(r0,r1,i0)              _eqi(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define nei(r0,r1,i0)              _nei(_jit,r0,r1,i0)
+static void _gti(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define gti(r0,r1,i0)              _gti(_jit,r0,r1,i0)
+static void _gei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define gei(r0,r1,i0)              _gei(_jit,r0,r1,i0)
+static void _gti_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define gti_u(r0,r1,i0)            _gti_u(_jit,r0,r1,i0)
+static void _gei_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define gei_u(r0,r1,i0)            _gei_u(_jit,r0,r1,i0)
+static void _lti(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define lti(r0,r1,i0)              _lti(_jit,r0,r1,i0)
+static void _lei(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define lei(r0,r1,i0)              _lei(_jit,r0,r1,i0)
+static void _lti_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define lti_u(r0,r1,i0)            _lti_u(_jit,r0,r1,i0)
+static void _lei_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define lei_u(r0,r1,i0)            _lei_u(_jit,r0,r1,i0)
+static void _lshr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define lshr(r0,r1,r2)             _lshr(_jit,r0,r1,r2)
+static void _rshr(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define rshr(r0,r1,r2)             _rshr(_jit,r0,r1,r2)
+static void _rshr_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define rshr_u(r0,r1,r2)           _rshr_u(_jit,r0,r1,r2)
+static void _lshi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define lshi(r0,r1,i0)             _lshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define rshi(r0,r1,i0)             _rshi(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define rshi_u(r0,r1,i0)           _rshi_u(_jit,r0,r1,i0)
+#  define qlshr(r0,r1,r2,r3)           _qlshr(_jit,r0,r1,r2,r3)
+static void
+_qlshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qlshr_u(r0, r1, r2, r3)      _qlshr_u(_jit,r0,r1,r2,r3)
+static void
+_qlshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qlshi(r0, r1, r2, i0)                xlshi(1, r0, r1, r2, i0)
+#  define qlshi_u(r0, r1, r2, i0)      xlshi(0, r0, r1, r2, i0)
+#  define xlshi(s, r0, r1, r2, i0)     _xlshi(_jit, s, r0, r1, r2, i0)
+static void
+_xlshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#  define qrshr(r0, r1, r2, r3)                _qrshr(_jit,r0,r1,r2,r3)
+static void
+_qrshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qrshr_u(r0, r1, r2, r3)      _qrshr_u(_jit,r0,r1,r2,r3)
+static void
+_qrshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define qrshi(r0, r1, r2, i0)                xrshi(1, r0, r1, r2, i0)
+#  define qrshi_u(r0, r1, r2, i0)      xrshi(0, r0, r1, r2, i0)
+#  define xrshi(s, r0, r1, r2, i0)     _xrshi(_jit, s, r0, r1, r2, i0)
+static void
+_xrshi(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t,jit_word_t);
+#    define ldr_c(r0,r1)               LDB(r0,r1)
+#    define ldr_s(r0,r1)               LDW(r0,r1)
+#    define ldr_i(r0,r1)               LDL(r0,r1)
+static void _ldr_uc(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#    define ldr_uc(r0,r1)              _ldr_uc(_jit,r0,r1)
+static void _ldr_us(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#    define ldr_us(r0,r1)              _ldr_us(_jit,r0,r1)
+static void _ldi_c(jit_state_t*,jit_uint16_t,jit_word_t);
+#    define ldi_c(r0,i0)               _ldi_c(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_uint16_t,jit_word_t);
+#    define ldi_s(r0,i0)               _ldi_s(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_uint16_t,jit_word_t);
+#    define ldi_i(r0,i0)               _ldi_i(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_uint16_t,jit_word_t);
+#    define ldi_uc(r0,i0)              _ldi_uc(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_uint16_t,jit_word_t);
+#    define ldi_us(r0,i0)              _ldi_us(_jit,r0,i0)
+static void _ldxr_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define ldxr_c(r0,r1,r2)           _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define ldxr_s(r0,r1,r2)           _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define ldxr_i(r0,r1,r2)           _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define ldxr_uc(r0,r1,r2)          _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define ldxr_us(r0,r1,r2)          _ldxr_us(_jit,r0,r1,r2)
+static void _ldxi_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define ldxi_c(r0,r1,i0)           _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define ldxi_s(r0,r1,i0)           _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define ldxi_i(r0,r1,i0)           _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define ldxi_uc(r0,r1,i0)          _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#    define ldxi_us(r0,r1,i0)          _ldxi_us(_jit,r0,r1,i0)
+#  define ldxbi_c(r0,r1,i0)            generic_ldxbi_c(r0,r1,i0)
+#  define ldxbi_uc(r0,r1,i0)           generic_ldxbi_uc(r0,r1,i0)
+#  define ldxbi_s(r0,r1,i0)            generic_ldxbi_s(r0,r1,i0)
+#  define ldxbi_us(r0,r1,i0)           generic_ldxbi_us(r0,r1,i0)
+#  define ldxbi_i(r0,r1,i0)            generic_ldxbi_i(r0,r1,i0)
+static void _ldxai_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#  define ldxai_c(r0,r1,i0)            _ldxai_c(_jit,r0,r1,i0)
+static void _ldxai_uc(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#  define ldxai_uc(r0,r1,i0)           _ldxai_uc(_jit,r0,r1,i0)
+static void _ldxai_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#  define ldxai_s(r0,r1,i0)            _ldxai_s(_jit,r0,r1,i0)
+static void _ldxai_us(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#  define ldxai_us(r0,r1,i0)           _ldxai_us(_jit,r0,r1,i0)
+static void _ldxai_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#  define ldxai_i(r0,r1,i0)            _ldxai_i(_jit,r0,r1,i0)
+#  define unldr(r0, r1, i0)            fallback_unldr(r0, r1, i0)
+#  define unldi(r0, i0, i1)            fallback_unldi(r0, i0, i1)
+#  define unldr_u(r0, r1, i0)          fallback_unldr_u(r0, r1, i0)
+#  define unldi_u(r0, i0, i1)          fallback_unldi_u(r0, i0, i1)
+#    define str_c(r0,r1)               STB(r0,r1)
+#    define str_s(r0,r1)               STW(r0,r1)
+#    define str_i(r0,r1)               STL(r0,r1)
+static void _sti_c(jit_state_t*,jit_word_t,jit_uint16_t);
+#    define sti_c(i0,r0)               _sti_c(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_uint16_t);
+#    define sti_s(i0,r0)               _sti_s(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_uint16_t);
+#    define sti_i(i0,r0)               _sti_i(_jit,i0,r0)
+static void _stxr_c(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define stxr_c(r0,r1,r2)           _stxr_c(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define stxr_s(r0,r1,r2)           _stxr_s(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#    define stxr_i(r0,r1,r2)           _stxr_i(_jit,r0,r1,r2)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+#    define stxi_c(i0,r0,r1)           _stxi_c(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+#    define stxi_s(i0,r0,r1)           _stxi_s(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+#    define stxi_i(i0,r0,r1)           _stxi_i(_jit,i0,r0,r1)
+static void _stxbi_c(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+#  define stxbi_c(i0,r0,r1)            _stxbi_c(_jit,i0,r0,r1)
+static void _stxbi_s(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+#  define stxbi_s(i0,r0,r1)            _stxbi_s(_jit,i0,r0,r1)
+static void _stxbi_i(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+#  define stxbi_i(i0,r0,r1)            _stxbi_i(_jit,i0,r0,r1)
+#  define stxai_c(i0,r0,r1)            generic_stxai_c(i0,r0,r1)
+#  define stxai_s(i0,r0,r1)            generic_stxai_s(i0,r0,r1)
+#  define stxai_i(i0,r0,r1)            generic_stxai_i(i0,r0,r1)
+#  define unstr(r0, r1, i0)            fallback_unstr(r0, r1, i0)
+#  define unsti(i0, r0, i1)            fallback_unsti(i0, r0, i1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_uint16_t,
+                       jit_uint16_t,jit_bool_t,jit_bool_t);
+#    define bltr(i0,r0,r1)             bltr_p(i0,r0,r1,0)
+#    define bler(i0,r0,r1)             bler_p(i0,r0,r1,0)
+#    define bgtr(i0,r0,r1)             bgtr_p(i0,r0,r1,0)
+#    define bger(i0,r0,r1)             bger_p(i0,r0,r1,0)
+#    define bltr_p(i0,r0,r1,p)         _bger(_jit,i0,r0,r1,0,p)
+#    define bler_p(i0,r0,r1,p)         _bger(_jit,i0,r1,r0,1,p)
+#    define bgtr_p(i0,r0,r1,p)         _bger(_jit,i0,r1,r0,0,p)
+#    define bger_p(i0,r0,r1,p)         _bger(_jit,i0,r0,r1,1,p)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_uint16_t,
+                         jit_uint16_t,jit_bool_t,jit_bool_t);
+#    define bltr_u(i0,r0,r1)           bltr_u_p(i0,r0,r1,0)
+#    define bler_u(i0,r0,r1)           bler_u_p(i0,r0,r1,0)
+#    define bgtr_u(i0,r0,r1)           bgtr_u_p(i0,r0,r1,0)
+#    define bger_u(i0,r0,r1)           bger_u_p(i0,r0,r1,0)
+#    define bltr_u_p(i0,r0,r1,p)       _bger_u(_jit,i0,r0,r1,0,p)
+#    define bler_u_p(i0,r0,r1,p)       _bger_u(_jit,i0,r1,r0,1,p)
+#    define bgtr_u_p(i0,r0,r1,p)       _bger_u(_jit,i0,r1,r0,0,p)
+#    define bger_u_p(i0,r0,r1,p)       _bger_u(_jit,i0,r0,r1,1,p)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_uint16_t,
+                       jit_uint16_t,jit_bool_t);
+#    define beqr(i0,r0,r1)             beqr_p(i0,r0,r1,0)
+#    define beqr_p(i0,r0,r1,p)         _beqr(_jit,i0,r0,r1,p)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_uint16_t,
+                       jit_uint16_t,jit_bool_t);
+#    define bner(i0,r0,r1)             bner_p(i0,r0,r1,0)
+#    define bner_p(i0,r0,r1,p)         _bner(_jit,i0,r0,r1,p)
+static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_uint16_t,
+                       jit_uint16_t,jit_bool_t);
+#    define bmsr(i0,r0,r1)             bmsr_p(i0,r0,r1,0)
+#    define bmsr_p(i0,r0,r1,p)         _bmsr(_jit,i0,r0,r1,p)
+static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_uint16_t,
+                       jit_uint16_t,jit_bool_t);
+#    define bmcr(i0,r0,r1)             bmcr_p(i0,r0,r1,0)
+#    define bmcr_p(i0,r0,r1,p)         _bmcr(_jit,i0,r0,r1,p)
+static jit_word_t _boaddr(jit_state_t*,jit_word_t,jit_uint16_t,
+                         jit_uint16_t,jit_bool_t,jit_bool_t);
+#    define boaddr(i0,r0,r1)           boaddr_p(i0,r0,r1,0)
+#    define bxaddr(i0,r0,r1)           bxaddr_p(i0,r0,r1,0)
+#    define boaddr_p(i0,r0,r1,p)       _boaddr(_jit,i0,r0,r1,1,p)
+#    define bxaddr_p(i0,r0,r1,p)       _boaddr(_jit,i0,r0,r1,0,p)
+static jit_word_t _boaddr_u(jit_state_t*,jit_word_t,jit_uint16_t,
+                           jit_uint16_t,jit_bool_t,jit_bool_t);
+#    define boaddr_u(i0,r0,r1)         boaddr_u_p(i0,r0,r1,0)
+#    define bxaddr_u(i0,r0,r1)         bxaddr_u_p(i0,r0,r1,0)
+#    define boaddr_u_p(i0,r0,r1,p)     _boaddr_u(_jit,i0,r0,r1,1,p)
+#    define bxaddr_u_p(i0,r0,r1,p)     _boaddr_u(_jit,i0,r0,r1,0,p)
+static jit_word_t _bosubr(jit_state_t*,jit_word_t,jit_uint16_t,
+                         jit_uint16_t,jit_bool_t,jit_bool_t);
+#    define bosubr(i0,r0,r1)           bosubr_p(i0,r0,r1,0)
+#    define bxsubr(i0,r0,r1)           bxsubr_p(i0,r0,r1,0)
+#    define bosubr_p(i0,r0,r1,p)       _bosubr(_jit,i0,r0,r1,1,p)
+#    define bxsubr_p(i0,r0,r1,p)       _bosubr(_jit,i0,r0,r1,0,p)
+static jit_word_t _bosubr_u(jit_state_t*,jit_word_t,jit_uint16_t,
+                           jit_uint16_t,jit_bool_t,jit_bool_t);
+#    define bosubr_u(i0,r0,r1)         bosubr_u_p(i0,r0,r1,0)
+#    define bxsubr_u(i0,r0,r1)         bxsubr_u_p(i0,r0,r1,0)
+#    define bosubr_u_p(i0,r0,r1,p)     _bosubr_u(_jit,i0,r0,r1,1,p)
+#    define bxsubr_u_p(i0,r0,r1,p)     _bosubr_u(_jit,i0,r0,r1,0,p)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_uint16_t,
+                       jit_word_t,jit_bool_t,jit_bool_t);
+#    define blei(i0,r0,i1)             blei_p(i0,r0,i1,0)
+#    define bgti(i0,r0,i1)             bgti_p(i0,r0,i1,0)
+#    define blei_p(i0,r0,i1,p)         _bgti(_jit,i0,r0,i1,0,p)
+#    define bgti_p(i0,r0,i1,p)         _bgti(_jit,i0,r0,i1,1,p)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_uint16_t,
+                       jit_word_t,jit_bool_t,jit_bool_t);
+#    define blti(i0,r0,i1)             blti_p(i0,r0,i1,0)
+#    define bgei(i0,r0,i1)             bgei_p(i0,r0,i1,0)
+#    define blti_p(i0,r0,i1,p)         _bgei(_jit,i0,r0,i1,0,p)
+#    define bgei_p(i0,r0,i1,p)         _bgei(_jit,i0,r0,i1,1,p)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_uint16_t,
+                         jit_word_t,jit_bool_t,jit_bool_t);
+#    define blei_u(i0,r0,i1)           blei_u_p(i0,r0,i1,0)
+#    define bgti_u(i0,r0,i1)           bgti_u_p(i0,r0,i1,0)
+#    define blei_u_p(i0,r0,i1,p)       _bgti_u(_jit,i0,r0,i1,0,p)
+#    define bgti_u_p(i0,r0,i1,p)       _bgti_u(_jit,i0,r0,i1,1,p)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_uint16_t,
+                         jit_word_t,jit_bool_t,jit_bool_t);
+#    define blti_u(i0,r0,i1)           blti_u_p(i0,r0,i1,0)
+#    define bgei_u(i0,r0,i1)           bgei_u_p(i0,r0,i1,0)
+#    define blti_u_p(i0,r0,i1,p)       _bgei_u(_jit,i0,r0,i1,0,p)
+#    define bgei_u_p(i0,r0,i1,p)       _bgei_u(_jit,i0,r0,i1,1,p)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_uint16_t,
+                       jit_word_t,jit_bool_t,jit_bool_t);
+#    define beqi(i0,r0,i1)             beqi_p(i0,r0,i1,0)
+#    define bnei(i0,r0,i1)             bnei_p(i0,r0,i1,0)
+#    define beqi_p(i0,r0,i1,p)         _beqi(_jit,i0,r0,i1,1,p)
+#    define bnei_p(i0,r0,i1,p)         _beqi(_jit,i0,r0,i1,0,p)
+static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_uint16_t,
+                       jit_word_t,jit_bool_t,jit_bool_t);
+#    define bmsi(i0,r0,i1)             bmsi_p(i0,r0,i1,0)
+#    define bmci(i0,r0,i1)             bmci_p(i0,r0,i1,0)
+#    define bmsi_p(i0,r0,i1,p)         _bmsi(_jit,i0,r0,i1,0,p)
+#    define bmci_p(i0,r0,i1,p)         _bmsi(_jit,i0,r0,i1,1,p)
+static jit_word_t _boaddi(jit_state_t*,jit_word_t,jit_uint16_t,
+                         jit_word_t,jit_bool_t,jit_bool_t);
+#    define boaddi(i0,r0,i1)           boaddi_p(i0,r0,i1,0)
+#    define bxaddi(i0,r0,i1)           bxaddi_p(i0,r0,i1,0)
+#    define boaddi_p(i0,r0,i1,p)       _boaddi(_jit,i0,r0,i1,1,p)
+#    define bxaddi_p(i0,r0,i1,p)       _boaddi(_jit,i0,r0,i1,0,p)
+static jit_word_t _boaddi_u(jit_state_t*,jit_word_t,jit_uint16_t,
+                           jit_word_t,jit_bool_t,jit_bool_t);
+#    define boaddi_u(i0,r0,i1)         boaddi_u_p(i0,r0,i1,0)
+#    define bxaddi_u(i0,r0,i1)         bxaddi_u_p(i0,r0,i1,0)
+#    define boaddi_u_p(i0,r0,i1,p)     _boaddi_u(_jit,i0,r0,i1,1,p)
+#    define bxaddi_u_p(i0,r0,i1,p)     _boaddi_u(_jit,i0,r0,i1,0,p)
+static jit_word_t _bosubi(jit_state_t*,jit_word_t,jit_uint16_t,
+                         jit_word_t,jit_bool_t,jit_bool_t);
+#    define bosubi(i0,r0,i1)           bosubi_p(i0,r0,i1,0)
+#    define bxsubi(i0,r0,i1)           bxsubi_p(i0,r0,i1,0)
+#    define bosubi_p(i0,r0,i1,p)       _bosubi(_jit,i0,r0,i1,1,p)
+#    define bxsubi_p(i0,r0,i1,p)       _bosubi(_jit,i0,r0,i1,0,p)
+static jit_word_t _bosubi_u(jit_state_t*,jit_word_t,jit_uint16_t,
+                           jit_word_t,jit_bool_t,jit_bool_t);
+#    define bosubi_u(i0,r0,i1)         bosubi_u_p(i0,r0,i1,0)
+#    define bxsubi_u(i0,r0,i1)         bxsubi_u_p(i0,r0,i1,0)
+#    define bosubi_u_p(i0,r0,i1,p)     _bosubi_u(_jit,i0,r0,i1,1,p)
+#    define bxsubi_u_p(i0,r0,i1,p)     _bosubi_u(_jit,i0,r0,i1,0,p)
+static void _jmpr(jit_state_t*,jit_int16_t);
+#  define jmpr(r0)                     _jmpr(_jit,r0)
+static jit_word_t _jmpi(jit_state_t*,jit_word_t,jit_bool_t);
+#  define jmpi(i0)                     _jmpi(_jit,i0,0)
+static void _callr(jit_state_t*,jit_int16_t);
+#  define callr(r0)                    _callr(_jit,r0)
+static void _calli(jit_state_t*,jit_word_t);
+#  define calli(i0)                    _calli(_jit,i0)
+
+static jit_word_t _movi_p(jit_state_t*,jit_uint16_t,jit_word_t);
+#    define movi_p(r0,i0)              _movi_p(_jit,r0,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#  define calli_p(i0)                  _calli_p(_jit,i0)
+static void _patch_abs(jit_state_t*,jit_word_t,jit_word_t);
+#    define patch_abs(instr,label)     _patch_abs(_jit,instr,label)
+static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
+#    define patch_at(jump,label)       _patch_at(_jit,jump,label)
+static void _prolog(jit_state_t*,jit_node_t*);
+#  define prolog(node)                 _prolog(_jit,node)
+static void _epilog(jit_state_t*,jit_node_t*);
+#  define epilog(node)                 _epilog(_jit,node)
+static void _vastart(jit_state_t*, jit_int32_t);
+#  define vastart(r0)                  _vastart(_jit, r0)
+static void _vaarg(jit_state_t*, jit_int32_t, jit_int32_t);
+#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
+
+#    define ldr(r0,r1)                 ldr_i(r0,r1)
+#    define ldi(r0,i0)                 ldi_i(r0,i0)
+#    define ldxr(r0,r1,r2)             ldxr_i(r0,r1,r2)
+#    define ldxi(r0,r1,i0)             ldxi_i(r0,r1,i0)
+#    define str(r0,r1)                 str_i(r0,r1)
+#    define sti(i0,r0)                 sti_i(i0,r0)
+#    define stxr(r0,r1,r2)             stxr_i(r0,r1,r2)
+#    define stxi(i0,r0,r1)             stxi_i(i0,r0,r1)
+
+#  define is_low_mask(im)              (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0)
+#  define is_middle_mask(im)           ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) <= 1) : 0)
+#  define is_high_mask(im)             ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0)
+#  define masked_bits_count(im)                __builtin_popcountl(im)
+#  define unmasked_bits_count(im)      (__WORDSIZE - masked_bits_count(im))
+
+#  if defined(__SH3__) || defined(__SH4__) || defined(__SH4_NOFPU__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+#    define jit_sh34_p()       1
+#  else
+#    define jit_sh34_p()       0
+#  endif
+
+static void _maybe_emit_frchg(jit_state_t *_jit);
+#  define maybe_emit_frchg() _maybe_emit_frchg(_jit)
+static void _maybe_emit_fschg(jit_state_t *_jit);
+#  define maybe_emit_fschg() _maybe_emit_fschg(_jit)
+#endif /* PROTO */
+
+#if CODE
+static void
+_cni(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t n, jit_uint16_t i)
+{
+       jit_instr_t op;
+
+       op.ni = (struct jit_instr_ni){ .c = c, .n = n, .i = i };
+
+       ii(op.op);
+}
+
+static void
+_cnmd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t n,
+      jit_uint16_t m, jit_uint16_t d)
+{
+       jit_instr_t op;
+
+       op.nmd = (struct jit_instr_nmd){ .c = c, .n = n, .m = m, .d = d };
+
+       ii(op.op);
+}
+
+static void
+_cmd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t m, jit_uint16_t d)
+{
+       jit_instr_t op;
+
+       op.md = (struct jit_instr_md){ .c = c, .m = m, .d = d };
+
+       ii(op.op);
+}
+
+static void
+_cd(jit_state_t *_jit, jit_uint16_t c, jit_uint16_t d)
+{
+       jit_instr_t op;
+
+       op.d = (struct jit_instr_d){ .c = c, .d = d };
+
+       ii(op.op);
+}
+
+static void
+_nop(jit_state_t *_jit, jit_word_t i0)
+{
+       for (; i0 > 0; i0 -= 2)
+               NOP();
+       assert(i0 == 0);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (r0 != r1) {
+               if (r1 == _GBR)
+                       STCGBR(r0);
+               else if (r0 == _GBR)
+                       LDCGBR(r1);
+               else
+                       MOV(r0, r1);
+       }
+}
+
+static void
+movi_loop(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       jit_word_t tmp;
+
+       if (i0 >= -128 && i0 < 128) {
+               MOVI(r0, i0);
+       } else {
+               tmp = (i0 >> 8) + !!(i0 & 0x80);
+               if (tmp & 0xff) {
+                       movi_loop(_jit, r0, tmp);
+                       if (tmp != 0)
+                               SHLL8(r0);
+               } else {
+                       tmp = (i0 >> 16) + !!(i0 & 0x80);
+                       movi_loop(_jit, r0, tmp);
+                       if (tmp != 0)
+                               SHLL16(r0);
+               }
+               if (i0 & 0xff)
+                       ADDI(r0, i0 & 0xff);
+       }
+}
+
+static jit_word_t
+movi_loop_cnt(jit_word_t i0)
+{
+       jit_word_t tmp, cnt = 0;
+
+       if (i0 >= -128 && i0 < 128) {
+               cnt = 1;
+       } else {
+               tmp = (i0 >> 8) + !!(i0 & 0x80);
+               if (tmp & 0xff) {
+                       cnt += !!tmp + movi_loop_cnt(tmp);
+               } else {
+                       tmp = (i0 >> 16) + !!(i0 & 0x80);
+                       cnt += !!tmp + movi_loop_cnt(tmp);
+               }
+               cnt += !!(i0 & 0xff);
+       }
+
+       return cnt;
+}
+
+static void
+_movi(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       jit_word_t w = _jit->pc.w & ~3;
+
+       if (i0 >= -128 && i0 < 128) {
+               MOVI(r0, i0);
+       } else if (!(i0 & 0x1) && i0 >= -256 && i0 < 256) {
+               MOVI(r0, i0 >> 1);
+               SHLL(r0);
+       } else if (!(i0 & 0x3) && i0 >= -512 && i0 < 512) {
+               MOVI(r0, i0 >> 2);
+               SHLL2(r0);
+       } else if (i0 >= w && i0 <= w + 0x3ff && !((i0 - w) & 0x3)) {
+               MOVA((i0 - w) >> 2);
+               movr(r0, _R0);
+       } else if (is_low_mask(i0)) {
+               MOVI(r0, -1);
+               rshi_u(r0, r0, unmasked_bits_count(i0));
+       } else if (is_high_mask(i0)) {
+               MOVI(r0, -1);
+               lshi(r0, r0, unmasked_bits_count(i0));
+       } else if (movi_loop_cnt(i0) < 4) {
+               movi_loop(_jit, r0, i0);
+       } else {
+               load_const(0, r0, i0);
+       }
+}
+
+static void
+emit_branch_opcode(jit_state_t *_jit, jit_word_t i0, jit_word_t w,
+                  int t_set, int force_patchable)
+{
+       jit_int32_t disp = (i0 - w >> 1) - 2;
+       jit_uint16_t reg;
+
+       if (!force_patchable && i0 == 0) {
+               /* Positive displacement - we don't know the target yet. */
+               if (t_set)
+                       BT(0);
+               else
+                       BF(0);
+
+               /* Leave space after the BF/BT in case we need to add a
+                * BRA opcode. */
+               w = _jit->code.length - (_jit->pc.uc - _jit->code.ptr);
+               if (w > 254) {
+                       NOP();
+                       NOP();
+               }
+       } else if (!force_patchable && disp >= -128) {
+               if (t_set)
+                       BT(disp);
+               else
+                       BF(disp);
+       } else {
+               reg = jit_get_reg(jit_class_gpr);
+
+               if (force_patchable)
+                       movi_p(rn(reg), i0);
+               else
+                       movi(rn(reg), i0);
+               if (t_set)
+                       BF(0);
+               else
+                       BT(0);
+               JMP(rn(reg));
+               NOP();
+
+               jit_unget_reg(reg);
+       }
+}
+
+static void _maybe_emit_frchg(jit_state_t *_jit)
+{
+       jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2);
+
+       if (_jitc->no_flag && instr->op == 0xfbfd)
+               _jit->pc.us--;
+       else
+               FRCHG();
+}
+
+static void _maybe_emit_fschg(jit_state_t *_jit)
+{
+       jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2);
+
+       if (_jitc->no_flag && instr->op == 0xf3fd)
+               _jit->pc.us--;
+       else
+               FSCHG();
+}
+
+static void maybe_emit_tst(jit_state_t *_jit, jit_uint16_t r0, jit_bool_t *set)
+{
+       jit_instr_t *instr = (jit_instr_t *)(_jit->pc.w - 2);
+
+       /* If the previous opcode is a MOVT(r0), we can skip the TST opcode,
+        * but we need to invert the branch condition. */
+       if (_jitc->no_flag && instr->op == (0x29 | (r0 << 8)))
+               *set ^= 1;
+       else
+               TST(r0, r0);
+}
+
+static void _movnr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                  jit_uint16_t r2, jit_bool_t set)
+{
+       maybe_emit_tst(_jit, r2, &set);
+
+       emit_branch_opcode(_jit, 4, 0, set, 0);
+       movr(r0, r1);
+}
+
+static char atomic_byte;
+
+static void
+_casx(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+      jit_int32_t r2, jit_int32_t r3, jit_word_t i0)
+{
+    jit_int32_t                r1_reg, iscasi, addr_reg;
+
+    if ((iscasi = (r1 == _NOREG))) {
+       r1_reg = jit_get_reg(jit_class_gpr);
+       r1 = rn(r1_reg);
+       movi(r1, i0);
+    }
+
+    addr_reg = jit_get_reg(jit_class_gpr);
+    movi(rn(addr_reg), (uintptr_t)&atomic_byte);
+
+    TAS(rn(addr_reg));
+    BF(-3);
+
+    LDL(r0, r1);
+    CMPEQ(r0, r2);
+    MOVT(r0);
+
+    BF(0);
+    STL(r1, r3);
+
+    MOVI(_R0, 0);
+    STB(rn(addr_reg), _R0);
+
+    jit_unget_reg(addr_reg);
+    if (iscasi)
+       jit_unget_reg(r1_reg);
+}
+
+static void
+_addr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       if (r0 == r2) {
+               ADD(r0, r1);
+       } else {
+               movr(r0, r1);
+               ADD(r0, r2);
+       }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       CLRT();
+       addxr(r0, r1, r2);
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       if (r0 == r2) {
+               ADDC(r0, r1);
+       } else {
+               movr(r0, r1);
+               ADDC(r0, r2);
+       }
+}
+
+static void
+_addi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       if (i0 >= -128 && i0 < 127) {
+               movr(r0, r1);
+               ADDI(r0, i0);
+       } else if (r0 != r1) {
+               movi(r0, i0);
+               addr(r0, r1, r0);
+       } else {
+               assert(r1 != _R0);
+
+               movi(_R0, i0);
+               addr(r0, r1, _R0);
+       }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       CLRT();
+       addxi(r0, r1, i0);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r0 != _R0 && r1 != _R0);
+
+       movi(_R0, i0);
+       addxr(r0, r1, _R0);
+}
+
+static void
+_subr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       if (r1 == r2) {
+               movi(r0, 0);
+       } else if (r0 == r2) {
+               NEG(r0, r2);
+               ADD(r0, r1);
+       } else {
+               movr(r0, r1);
+               SUB(r0, r2);
+       }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       CLRT();
+       subxr(r0, r1, r2);
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       jit_uint32_t reg;
+
+       if (r0 != r2) {
+               movr(r0, r1);
+               SUBC(r0, r2);
+       } else {
+               reg = jit_get_reg(jit_class_gpr);
+
+               movr(rn(reg), r0);
+               movr(r0, r1);
+               SUBC(r0, rn(reg));
+
+               jit_unget_reg(reg);
+       }
+}
+
+static void
+_subi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       addi(r0, r1, -i0);
+}
+
+static void
+_subci(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r0 != _R0 && r1 != _R0);
+
+       movi(_R0, i0);
+       subcr(r0, r1, _R0);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r0 != _R0 && r1 != _R0);
+
+       movi(_R0, i0);
+       subxr(r0, r1, _R0);
+}
+
+static void
+_rsbi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       if ((jit_uword_t)((i0 >> 7) + 1) < 2) {
+               negr(r0, r1);
+               ADDI(r0, i0);
+       } else if (r0 != r1) {
+               assert(r0 != _R0 && r1 != _R0);
+
+               movi(r0, i0);
+               subr(r0, r0, r1);
+       } else {
+               assert(r0 != _R0);
+
+               movi(_R0, i0);
+               subr(r0, _R0, r1);
+       }
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       MULL(r1, r2);
+       STSL(r0);
+}
+
+static void
+_hmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+       DMULS(r1, r2);
+       STSH(r0);
+}
+
+static void
+_hmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movi(_R0, i0);
+    hmulr(r0, r1, _R0);
+}
+
+static void
+_hmulr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+       DMULU(r1, r2);
+       STSH(r0);
+}
+
+static void
+_hmuli_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    movi(_R0, i0);
+    hmulr_u(r0, r1, _R0);
+}
+
+static void
+_qmulr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+       jit_uint16_t r2, jit_uint16_t r3)
+{
+       DMULS(r2, r3);
+       STSL(r0);
+       STSH(r1);
+}
+
+static void
+_qmulr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+        jit_uint16_t r2, jit_uint16_t r3)
+{
+       DMULU(r2, r3);
+       STSL(r0);
+       STSH(r1);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       movi(_R0, i0);
+       mulr(r0, r1, _R0);
+}
+
+static void
+_qmuli(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+       jit_uint16_t r2, jit_word_t i0)
+{
+       assert(r2 != _R0);
+
+       movi(_R0, i0);
+       qmulr(r0, r1, r2, _R0);
+}
+
+static void
+_qmuli_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+        jit_uint16_t r2, jit_word_t i0)
+{
+       assert(r2 != _R0);
+
+       movi(_R0, i0);
+       qmulr_u(r0, r1, r2, _R0);
+}
+
+static void
+_divr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       jit_uint32_t reg, reg2;
+       jit_uint16_t divisor;
+
+       assert(r1 != _R0 && r2 != _R0);
+
+       if (r1 == r2) {
+               MOVI(r0, 1);
+       } else {
+               reg = jit_get_reg(jit_class_gpr);
+
+               if (r0 == r2) {
+                       reg2 = jit_get_reg(jit_class_gpr);
+                       movr(rn(reg2), r2);
+                       divisor = rn(reg2);
+               } else {
+                       divisor = r2;
+               }
+
+               movr(r0, r1);
+               MOVI(_R0, 0);
+
+               CMPGT(_R0, r0);
+               SUBC(rn(reg), rn(reg));
+               SUBC(r0, _R0);
+
+               MOVI(_R0, -2);
+               DIV0S(rn(reg), divisor);
+
+               ROTCL(r0);
+               DIV1(rn(reg), divisor);
+               ROTCL(_R0);
+               XORI(1);
+               BTS(-6);
+               TSTI(1);
+
+               ROTCL(r0);
+               MOVI(_R0, 0);
+               ADDC(r0, _R0);
+
+               jit_unget_reg(reg);
+               if (r0 == r2)
+                       jit_unget_reg(reg2);
+       }
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       jit_uint32_t reg, reg2;
+       jit_uint16_t divisor;
+
+       assert(r1 != _R0 && r2 != _R0);
+
+       if (r1 == r2) {
+               MOVI(r0, 1);
+       } else {
+               reg = jit_get_reg(jit_class_gpr);
+
+               if (r0 == r2) {
+                       reg2 = jit_get_reg(jit_class_gpr);
+                       movr(rn(reg2), r2);
+                       divisor = rn(reg2);
+               } else {
+                       divisor = r2;
+               }
+
+               movr(r0, r1);
+               MOVI(rn(reg), 0);
+               MOVI(_R0, -2);
+               DIV0U();
+
+               ROTCL(r0);
+               DIV1(rn(reg), divisor);
+               ROTCL(_R0);
+               XORI(1);
+               BTS(-6);
+               TSTI(1);
+
+               ROTCL(r0);
+
+               jit_unget_reg(reg);
+               if (r0 == r2)
+                       jit_unget_reg(reg2);
+       }
+}
+
+static void
+_qdivr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+       jit_uint16_t r2, jit_uint16_t r3)
+{
+       jit_uint32_t reg;
+
+       assert(r2 != _R0 && r3 != _R0);
+
+       if (r0 != r2 && r0 != r3) {
+               divr(r0, r2, r3);
+               mulr(_R0, r0, r3);
+               subr(r1, r2, _R0);
+       } else {
+               reg = jit_get_reg(jit_class_gpr);
+
+               divr(rn(reg), r2, r3);
+               mulr(_R0, rn(reg), r3);
+               subr(r1, r2, _R0);
+               movr(r0, rn(reg));
+
+               jit_unget_reg(reg);
+       }
+}
+
+static void
+_qdivr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+        jit_uint16_t r2, jit_uint16_t r3)
+{
+       jit_uint32_t reg;
+
+       assert(r2 != _R0 && r3 != _R0);
+
+       if (r0 != r2 && r0 != r3) {
+               divr_u(r0, r2, r3);
+               mulr(_R0, r0, r3);
+               subr(r1, r2, _R0);
+       } else {
+               reg = jit_get_reg(jit_class_gpr);
+
+               divr_u(rn(reg), r2, r3);
+               mulr(_R0, rn(reg), r3);
+               subr(r1, r2, _R0);
+               movr(r0, rn(reg));
+
+               jit_unget_reg(reg);
+       }
+}
+
+static void
+_divi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+       movi(rn(reg), i0);
+       divr(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_qdivi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+       jit_uint16_t r2, jit_word_t i0)
+{
+       jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+       movi(rn(reg), i0);
+       qdivr(r0, r1, r2, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_qdivi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+        jit_uint16_t r2, jit_word_t i0)
+{
+       if (r0 != r2 && r1 != r2) {
+               fallback_divi_u(r0, r2, i0);
+               muli(r1, r0, i0);
+               subr(r1, r2, r1);
+       } else {
+               jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+               fallback_divi_u(rn(reg), r2, i0);
+               muli(_R0, rn(reg), i0);
+               subr(r1, r2, _R0);
+
+               jit_unget_reg(reg);
+       }
+}
+
+static void
+_remr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+       assert(r1 != _R0 && r2 != _R0);
+
+       qdivr(rn(reg), r0, r1, r2);
+
+       jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+       assert(r1 != _R0 && r2 != _R0);
+
+       qdivr_u(rn(reg), r0, r1, r2);
+
+       jit_unget_reg(reg);
+}
+
+static void
+_remi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+       movi(rn(reg), i0);
+       remr(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       jit_uint32_t reg = jit_get_reg(jit_class_gpr);
+
+       qdivi_u(rn(reg), r0, r1, i0);
+
+       jit_unget_reg(reg);
+}
+
+static void
+_bswapr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       EXTUW(r0, r1);
+       SWAPB(r0, r0);
+}
+
+static void
+_bswapr_ui(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       SWAPB(r0, r1);
+       SWAPW(r0, r0);
+       SWAPB(r0, r0);
+}
+
+static void
+_lrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+       assert(r0 != _R0 && r1 != _R0);
+
+       movr(_R0, r2);
+       movr(r0, r1);
+
+       ROTL(r0);
+       TST(_R0, _R0);
+       BFS(-4);
+       ADDI(_R0, -1);
+
+       ROTR(r0);
+}
+
+static void
+_rrotr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+       assert(r0 != _R0 && r1 != _R0);
+
+       movr(_R0, r2);
+       movr(r0, r1);
+
+       ROTR(r0);
+       TST(_R0, _R0);
+       BFS(-4);
+       ADDI(_R0, -1);
+
+       ROTL(r0);
+}
+
+static void
+_rroti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+       unsigned int i;
+
+       assert(i0 >= 0 && i0 <= __WORDSIZE - 1);
+       assert(r0 != _R0);
+
+       movr(r0, r1);
+
+       if (i0 < 6) {
+               for (i = 0; i < i0; i++)
+                       ROTR(r0);
+       } else if (__WORDSIZE - i0 < 6) {
+               for (i = 0; i < __WORDSIZE - i0; i++)
+                       ROTL(r0);
+       } else {
+               movi(_R0, i0);
+               rrotr(r0, r0, _R0);
+       }
+}
+
+static void
+_andr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       if (r0 == r2) {
+               AND(r0, r1);
+       } else {
+               movr(r0, r1);
+               AND(r0, r2);
+       }
+}
+
+static void
+_andi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       if (i0 == 0xff) {
+               extr_uc(r0, r1);
+       } else if (i0 == 0xffff) {
+               extr_us(r0, r1);
+       } else if (i0 == 0xffff0000) {
+               SWAPW(r0, r1);
+               SHLL16(r0);
+       } else if (r0 != r1) {
+               movi(r0, i0);
+               AND(r0, r1);
+       } else {
+               assert(r0 != _R0);
+
+               movi(_R0, i0);
+               AND(r0, _R0);
+       }
+}
+
+static void
+_orr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       if (r0 == r2) {
+               OR(r0, r1);
+       } else {
+               movr(r0, r1);
+               OR(r0, r2);
+       }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       if (r0 != r1) {
+               movi(r0, i0);
+               OR(r0, r1);
+       } else {
+               assert(r0 != _R0);
+
+               movi(_R0, i0);
+               OR(r0, _R0);
+       }
+}
+
+static void
+_xorr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       if (r0 == r2) {
+               XOR(r0, r1);
+       } else {
+               movr(r0, r1);
+               XOR(r0, r2);
+       }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       if (r0 == _R0 && !(i0 & ~0xff)) {
+               movr(r0, r1);
+               XORI(i0);
+       } else if (r0 != r1) {
+               movi(r0, i0);
+               XOR(r0, r1);
+       } else {
+               assert(r0 != _R0);
+
+               movi(_R0, i0);
+               XOR(r0, _R0);
+       }
+}
+
+static void _clor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+       movr(_R0, r1);
+       movi(r0, -1);
+
+       SHLL(_R0);
+       BTS(-3);
+       ADDI(r0, 1);
+}
+
+static void _clzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+       movr(_R0, r1);
+       movi(r0, -1);
+
+       SETT();
+       ROTCL(_R0);
+       BFS(-3);
+       ADDI(r0, 1);
+}
+
+static void _ctor(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+       movr(_R0, r1);
+       movi(r0, -1);
+
+       SHLR(_R0);
+       BTS(-3);
+       ADDI(r0, 1);
+}
+
+static void _ctzr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+       movr(_R0, r1);
+       movi(r0, -1);
+
+       SETT();
+       ROTCR(_R0);
+       BFS(-3);
+       ADDI(r0, 1);
+}
+
+static void
+_rbitr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+       movr(_R0, r1);
+
+       SETT();
+       ROTCR(_R0);
+       ROTCL(r0);
+       CMPEQI(1);
+       emit_branch_opcode(_jit, -6, 0, 0, 0);
+}
+
+static void
+_popcntr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+       assert(r0 != _R0);
+
+       movr(_R0, r1);
+       movi(r0, 0);
+
+       SHLR(_R0);
+       NEGC(r0, r0);
+       TST(_R0, _R0);
+       BFS(-5);
+       NEG(r0, r0);
+}
+
+static void
+_gtr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       CMPGT(r1, r2);
+       MOVT(r0);
+}
+
+static void
+_gtr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       CMPHI(r1, r2);
+       MOVT(r0);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       CMPGE(r1, r2);
+       MOVT(r0);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       CMPHS(r1, r2);
+       MOVT(r0);
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       CMPEQ(r1, r2);
+       MOVT(r0);
+}
+
+static void
+_ner(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       assert(r1 != _R0 && r2 != _R0);
+
+       MOVI(_R0, -1);
+       CMPEQ(r1, r2);
+       NEGC(r0, _R0);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       if (i0 == 0) {
+               TST(r1, r1);
+       } else if (i0 >= -128 && i0 < 128) {
+               assert(r1 != _R0);
+
+               movr(_R0, r1);
+               CMPEQI(i0);
+       } else {
+               assert(r1 != _R0);
+
+               movi(_R0, i0);
+               CMPEQ(r1, _R0);
+       }
+       MOVT(r0);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r0 != _R0 && r1 != _R0);
+
+       if (i0 == 0) {
+               TST(r1, r1);
+       } else if (i0 >= -128 && i0 < 128) {
+               movr(_R0, r1);
+               CMPEQI(i0);
+       } else {
+               movi(_R0, i0);
+               CMPEQ(r1, _R0);
+       }
+
+       MOVI(_R0, -1);
+       NEGC(r0, _R0);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       if (i0 == 0) {
+               CMPPL(r1);
+       } else {
+               assert(r1 != _R0);
+
+               movi(_R0, i0);
+               CMPGT(r1, _R0);
+       }
+       MOVT(r0);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       if (i0 == 0) {
+               CMPPZ(r1);
+       } else {
+               assert(r1 != _R0);
+
+               movi(_R0, i0);
+               CMPGE(r1, _R0);
+       }
+       MOVT(r0);
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       movi(_R0, i0);
+       CMPHI(r1, _R0);
+       MOVT(r0);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       movi(_R0, i0);
+       CMPHS(r1, _R0);
+       MOVT(r0);
+}
+
+static void
+_lti(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       if (i0 == 0) {
+               movr(r0, r1);
+               ROTCL(r0);
+               MOVT(r0);
+       } else {
+               movi(_R0, i0);
+               CMPGT(_R0, r1);
+               MOVT(r0);
+       }
+}
+
+static void
+_lei(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       movi(_R0, i0);
+       CMPGE(_R0, r1);
+       MOVT(r0);
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       movi(_R0, i0);
+       CMPHI(_R0, r1);
+       MOVT(r0);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       movi(_R0, i0);
+       CMPHS(_R0, r1);
+       MOVT(r0);
+}
+
+static void
+emit_shllr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (jit_sh34_p())
+               SHLD(r0, r1);
+       else {
+               movr(_R0, r1);
+
+               TST(_R0, _R0);
+               BTS(2);
+               DT(_R0);
+               BFS(-3);
+               SHLL(r0);
+       }
+}
+
+static void
+_lshr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       if (r0 == r2) {
+               assert(r1 != _R0);
+
+               movr(_R0, r2);
+               movr(r0, r1);
+               emit_shllr(_jit, r0, _R0);
+       } else {
+               movr(r0, r1);
+               emit_shllr(_jit, r0, r2);
+       }
+}
+
+static void
+_rshr(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       assert(r0 != _R0 && r1 != _R0);
+
+       if (jit_sh34_p()) {
+               negr(_R0, r2);
+               movr(r0, r1);
+               SHAD(r0, _R0);
+       } else {
+               movr(_R0, r2);
+               movr(r0, r1);
+
+               TST(_R0, _R0);
+               BTS(2);
+               DT(_R0);
+               BFS(-3);
+               SHAR(r0);
+       }
+}
+
+static void
+_rshr_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       assert(r0 != _R0 && r1 != _R0);
+
+       if (jit_sh34_p()) {
+               negr(_R0, r2);
+               movr(r0, r1);
+               SHLD(r0, _R0);
+       } else {
+               movr(_R0, r2);
+               movr(r0, r1);
+
+               TST(_R0, _R0);
+               BTS(2);
+               DT(_R0);
+               BFS(-3);
+               SHLR(r0);
+       }
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       jit_uint32_t reg, mask = 0x00838387;
+
+       movr(r0, r1);
+
+       if (i0 == 0)
+               return;
+
+       if (i0 == 4) {
+               SHLL2(r0);
+               SHLL2(r0);
+       } else if (mask & (1 << (i0 - 1))) {
+               if (i0 & 0x10)
+                       SHLL16(r0);
+               if (i0 & 0x8)
+                       SHLL8(r0);
+               if (i0 & 0x2)
+                       SHLL2(r0);
+               if (i0 & 0x1)
+                       SHLL(r0);
+       } else {
+               reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr);
+
+               movi(rn(reg), i0);
+               lshr(r0, r0, rn(reg));
+
+               if (r0 == _R0)
+                       jit_unget_reg(reg);
+       }
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       jit_uint32_t reg;
+
+       reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr);
+
+       movr(r0, r1);
+       if (jit_sh34_p()) {
+               movi(rn(reg), -i0);
+               SHAD(r0, rn(reg));
+       } else {
+               assert(i0 > 0);
+               movi(rn(reg), i0);
+               DT(rn(reg));
+               BFS(-3);
+               SHAR(r0);
+       }
+
+       if (r0 == _R0)
+               jit_unget_reg(reg);
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       jit_uint32_t reg, mask = 0x00838387;
+
+       movr(r0, r1);
+
+       if (i0 == 0)
+               return;
+
+       if (i0 == 4) {
+               SHLR2(r0);
+               SHLR2(r0);
+       } else if (mask & (1 << (i0 - 1))) {
+               if (i0 & 0x10)
+                       SHLR16(r0);
+               if (i0 & 0x8)
+                       SHLR8(r0);
+               if (i0 & 0x2)
+                       SHLR2(r0);
+               if (i0 & 0x1)
+                       SHLR(r0);
+       } else {
+               reg = r0 != _R0 ? _R0 : jit_get_reg(jit_class_gpr);
+
+               if (jit_sh34_p()) {
+                       movi(rn(reg), -i0);
+                       SHLD(r0, rn(reg));
+               } else {
+                       movi(rn(reg), i0);
+                       DT(rn(reg));
+                       BFS(-3);
+                       SHLR(r0);
+               }
+
+               if (r0 == _R0)
+                       jit_unget_reg(reg);
+       }
+}
+
+static void
+_qlshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3)
+{
+       assert(r0 != r1);
+       movr(_R0, r3);
+       movr(r0, r2);
+       CMPEQI(32);
+       movr(r1, r2);
+       BF(0);
+       XOR(r0, r0);
+       SHAD(r0, _R0);
+       ADDI(_R0, -__WORDSIZE);
+       SHAD(r1, _R0);
+}
+
+static void
+_qlshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+         jit_int32_t r2, jit_int32_t r3)
+{
+       assert(r0 != r1);
+       movr(_R0, r3);
+       movr(r0, r2);
+       CMPEQI(32);
+       movr(r1, r2);
+       BF(0);
+       XOR(r0, r0);
+       SHLD(r0, _R0);
+       ADDI(_R0, -__WORDSIZE);
+       SHLD(r1, _R0);
+}
+
+static void
+_xlshi(jit_state_t *_jit, jit_bool_t sign,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+       if (i0 == 0) {
+               movr(r0, r2);
+               if (sign)
+                       rshi(r1, r2, __WORDSIZE - 1);
+               else
+                       movi(r1, 0);
+       }
+       else if (i0 == __WORDSIZE) {
+               movr(r1, r2);
+               movi(r0, 0);
+       }
+       else {
+               assert((jit_uword_t)i0 <= __WORDSIZE);
+               if (sign)
+                       rshi(r1, r2, __WORDSIZE - i0);
+               else
+                       rshi_u(r1, r2, __WORDSIZE - i0);
+               lshi(r0, r2, i0);
+       }
+}
+
+static void
+_qrshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3)
+{
+       assert(r0 != r1);
+       NEG(_R0, r3);
+       movr(r1, r2);
+       CMPEQI(0);
+       movr(r0, r2);
+       BF(0);
+       MOV(r1, _R0);
+       SHAD(r0, _R0);
+       ADDI(_R0, __WORDSIZE);
+       SHAD(r1, _R0);
+}
+
+static void
+_qrshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+         jit_int32_t r2, jit_int32_t r3)
+{
+       assert(r0 != r1);
+       NEG(_R0, r3);
+       movr(r1, r2);
+       CMPEQI(0);
+       movr(r0, r2);
+       BF(0);
+       MOV(r1, _R0);
+       SHLD(r0, _R0);
+       ADDI(_R0, __WORDSIZE);
+       SHLD(r1, _R0);
+}
+
+static void
+_xrshi(jit_state_t *_jit, jit_bool_t sign,
+       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_word_t i0)
+{
+       if (i0 == 0) {
+               movr(r0, r2);
+               movi(r1, 0);
+       }
+       else if (i0 == __WORDSIZE) {
+               movr(r1, r2);
+               if (sign)
+                       rshi(r0, r2, __WORDSIZE - 1);
+               else
+                       movi(r0, 0);
+       }
+       else {
+               assert((jit_uword_t)i0 <= __WORDSIZE);
+               lshi(r1, r2, __WORDSIZE - i0);
+               if (sign)
+                       rshi(r0, r2, i0);
+               else
+                       rshi_u(r0, r2, i0);
+       }
+}
+
+static void _ldr_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       ldr_c(r0, r1);
+       extr_uc(r0, r0);
+}
+
+static void _ldr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       ldr_s(r0, r1);
+       extr_us(r0, r0);
+}
+
+static void _ldi_c(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       movi(_R0, i0);
+       ldr_c(r0, _R0);
+}
+
+static void _ldi_s(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       movi(_R0, i0);
+       ldr_s(r0, _R0);
+}
+
+static void _ldi_i(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       movi(_R0, i0);
+       ldr_i(r0, _R0);
+}
+
+static void _ldi_uc(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       movi(_R0, i0);
+       ldr_uc(r0, _R0);
+}
+
+static void _ldi_us(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       movi(_R0, i0);
+       ldr_us(r0, _R0);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       assert(r1 != _R0);
+
+       movr(_R0, r2);
+       LDRB(r0, r1);
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       assert(r1 != _R0);
+
+       movr(_R0, r2);
+       LDRW(r0, r1);
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       assert(r1 != _R0);
+
+       movr(_R0, r2);
+       LDRL(r0, r1);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       ldxr_c(r0, r1, r2);
+       extr_uc(r0, r0);
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       ldxr_s(r0, r1, r2);
+       extr_us(r0, r0);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       if (r1 == _GBR) {
+               if (i0 >= 0 && i0 <= 0xff) {
+                       GBRLDB(i0);
+                       movr(r0, _R0);
+               } else {
+                       movr(r0, r1);
+                       ldxi_c(r0, r0, i0);
+               }
+       } else if (i0 >= 0 && i0 <= 0xf) {
+               LDDB(r1, i0);
+               movr(r0, _R0);
+       } else {
+               movi(_R0, i0);
+               ldxr_c(r0, r1, _R0);
+       }
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       if (r1 == _GBR) {
+               if (i0 >= 0 && i0 <= 0x1ff && !(i0 & 0x1)) {
+                       GBRLDW(i0 >> 1);
+                       movr(r0, _R0);
+               } else {
+                       movr(r0, r1);
+                       ldxi_s(r0, r0, i0);
+               }
+       } else if (i0 >= 0 && i0 <= 0x1f && !(i0 & 0x1)) {
+               LDDW(r1, i0 >> 1);
+               movr(r0, _R0);
+       } else {
+               movi(_R0, i0);
+               ldxr_s(r0, r1, _R0);
+       }
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       if (r1 == _GBR) {
+               if (i0 >= 0 && i0 <= 0x3ff && !(i0 & 0x3)) {
+                       GBRLDL(i0 >> 2);
+                       movr(r0, _R0);
+               } else {
+                       movr(r0, r1);
+                       ldxi_i(r0, r0, i0);
+               }
+       } else if (i0 >= 0 && i0 <= 0x3f && !(i0 & 0x3)) {
+               LDDL(r0, r1, i0 >> 2);
+       } else {
+               movi(_R0, i0);
+               ldxr_i(r0, r1, _R0);
+       }
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       ldxi_c(_R0, r1, i0);
+       extr_uc(r0, _R0);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+       assert(r1 != _R0);
+
+       ldxi_s(_R0, r1, i0);
+       extr_us(r0, _R0);
+}
+
+static void
+_ldxai_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+    if (i0 == 1)
+        LDBU(r0, r1);
+    else
+        generic_ldxai_c(r0, r1, i0);
+}
+
+static void
+_ldxai_uc(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+    if (i0 == 1)
+        LDBU(r0, r1);
+    else
+        generic_ldxai_c(r0, r1, i0);
+    extr_uc(r0, r0);
+}
+
+static void
+_ldxai_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+    if (i0 == 2)
+        LDWU(r0, r1);
+    else
+        generic_ldxai_s(r0, r1, i0);
+}
+
+static void
+_ldxai_us(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+    if (i0 == 2)
+        LDWU(r0, r1);
+    else
+        generic_ldxai_s(r0, r1, i0);
+    extr_us(r0, r0);
+}
+
+static void
+_ldxai_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_word_t i0)
+{
+    if (i0 == 4)
+        LDLU(r0, r1);
+    else
+        generic_ldxai_i(r0, r1, i0);
+}
+
+static void _sti_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0)
+{
+       assert(r0 != _R0);
+
+       movi(_R0, i0);
+       str_c(_R0, r0);
+}
+
+static void _sti_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0)
+{
+       assert(r0 != _R0);
+
+       movi(_R0, i0);
+       str_s(_R0, r0);
+}
+
+static void _sti_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0)
+{
+       assert(r0 != _R0);
+
+       movi(_R0, i0);
+       str_i(_R0, r0);
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       assert(r1 != _R0 && r2 != _R0);
+
+       movr(_R0, r0);
+       STRB(r1, r2);
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       assert(r1 != _R0 && r2 != _R0);
+
+       movr(_R0, r0);
+       STRW(r1, r2);
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       assert(r1 != _R0 && r2 != _R0);
+
+       movr(_R0, r0);
+       STRL(r1, r2);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+       jit_uint32_t reg;
+
+       if (r0 == _GBR) {
+               if (i0 >= 0 && i0 <= 0xff) {
+                       movr(_R0, r1);
+                       GBRSTB(i0);
+               } else {
+                       reg = jit_get_reg(jit_class_gpr);
+                       movr(rn(reg), r0);
+                       stxi_c(i0, rn(reg), r1);
+                       jit_unget_reg(reg);
+               }
+       } else {
+               assert(r0 != _R0 && r1 != _R0);
+
+               movi(_R0, i0);
+               stxr_c(_R0, r0, r1);
+       }
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+       jit_uint32_t reg;
+
+       if (r0 == _GBR) {
+               if (i0 >= 0 && i0 <= 0x1ff && !(i0 & 0x1)) {
+                       movr(_R0, r1);
+                       GBRSTW(i0 >> 1);
+               } else {
+                       reg = jit_get_reg(jit_class_gpr);
+                       movr(rn(reg), r0);
+                       stxi_s(i0, rn(reg), r1);
+                       jit_unget_reg(reg);
+               }
+       } else {
+               assert(r0 != _R0 && r1 != _R0);
+
+               movi(_R0, i0);
+               stxr_s(_R0, r0, r1);
+       }
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+       jit_uint32_t reg;
+
+       if (r0 == _GBR) {
+               if (i0 >= 0 && i0 <= 0x3ff && !(i0 & 0x3)) {
+                       movr(_R0, r1);
+                       GBRSTL(i0 >> 2);
+               } else {
+                       reg = jit_get_reg(jit_class_gpr);
+                       movr(rn(reg), r0);
+                       stxi_i(i0, rn(reg), r1);
+                       jit_unget_reg(reg);
+               }
+       } else if (i0 >= 0 && i0 <= 0x3f && !(i0 & 3)) {
+               STDL(r0, r1, i0 >> 2);
+       } else {
+               assert(r0 != _R0 && r1 != _R0);
+
+               movi(_R0, i0);
+               stxr_i(_R0, r0, r1);
+       }
+}
+
+static void
+_stxbi_c(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (i0 == -1)
+               STBU(r0, r1);
+       else
+               generic_stxbi_c(i0, r0, r1);
+}
+
+static void
+_stxbi_s(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (i0 == -2)
+               STWU(r0, r1);
+       else
+               generic_stxbi_s(i0, r0, r1);
+}
+
+static void
+_stxbi_i(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (i0 == -4)
+               STLU(r0, r1);
+       else
+               generic_stxbi_i(i0, r0, r1);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+      jit_uint16_t r1, jit_bool_t t, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       CMPGE(r0, r1);
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, t, p);
+
+       return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+       jit_uint16_t r1, jit_bool_t t, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       CMPHS(r0, r1);
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, t, p);
+
+       return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+      jit_uint16_t r1, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       if (r0 == r1) {
+               if (p)
+                       w = jmpi_p(i0);
+               else
+                       w = _jmpi(_jit, i0, i0 == 0);
+       } else {
+               CMPEQ(r0, r1);
+               w = _jit->pc.w;
+               emit_branch_opcode(_jit, i0, w, 1, p);
+       }
+
+       return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+      jit_uint16_t r1, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       CMPEQ(r0, r1);
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, 0, p);
+
+       return (w);
+}
+
+static jit_word_t
+_bmsr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+      jit_uint16_t r1, jit_bool_t p)
+{
+       jit_bool_t set = 0;
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       if (r0 != r1)
+               TST(r0, r1);
+       else
+               maybe_emit_tst(_jit, r0, &set);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t
+_bmcr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+      jit_uint16_t r1, jit_bool_t p)
+{
+       jit_bool_t set = 1;
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       if (r0 != r1)
+               TST(r0, r1);
+       else
+               maybe_emit_tst(_jit, r0, &set);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+      jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       if (i1 == 0) {
+               CMPPL(r0);
+       } else {
+               assert(r0 != _R0);
+
+               movi(_R0, i1);
+               CMPGT(r0, _R0);
+       }
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+      jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       if (i1 == 0) {
+               CMPPZ(r0);
+       } else {
+               assert(r0 != _R0);
+
+               movi(_R0, i1);
+               CMPGE(r0, _R0);
+       }
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+       jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       if (i1 == 0) {
+               maybe_emit_tst(_jit, r0, &set);
+       } else {
+               assert(r0 != _R0);
+
+               movi(_R0, i1);
+               CMPHI(r0, _R0);
+       }
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+       jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       assert(r0 != _R0);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       movi(_R0, i1);
+       CMPHS(r0, _R0);
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _beqi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                       jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       if (i1 == 0) {
+               maybe_emit_tst(_jit, r0, &set);
+       } else if (i1 >= -128 && i1 < 128) {
+               movr(_R0, r0);
+               CMPEQI(i1);
+       } else {
+               assert(r0 != _R0);
+
+               movi(_R0, i1);
+               CMPEQ(_R0, r0);
+       }
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _bmsi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                       jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       assert(r0 != _R0);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       movi(_R0, i1);
+       TST(_R0, r0);
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _boaddr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                         jit_uint16_t r1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       ADDV(r0, r1);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _boaddr_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                           jit_uint16_t r1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       CLRT();
+       ADDC(r0, r1);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _boaddi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                         jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       assert(r0 != _R0);
+
+       movi(_R0, i1);
+       w = _boaddr(_jit, i0, r0, _R0, set, p);
+
+       return (w);
+}
+
+static jit_word_t _boaddi_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                           jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       assert(r0 != _R0);
+
+       movi(_R0, i1);
+       w = _boaddr_u(_jit, i0, r0, _R0, set, p);
+
+       return (w);
+}
+
+static jit_word_t _bosubr(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                         jit_uint16_t r1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       assert(r0 != _R0);
+
+       NEG(_R0, r1);
+       ADDV(r0, _R0);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _bosubr_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                           jit_uint16_t r1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       CLRT();
+       SUBC(r0, r1);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _bosubi(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                         jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       assert(r0 != _R0);
+
+       movi(_R0, i1);
+       w = _bosubr(_jit, i0, r0, _R0, set, p);
+
+       return (w);
+}
+
+static jit_word_t _bosubi_u(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                           jit_word_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       assert(r0 != _R0);
+
+       movi(_R0, i1);
+       w = _bosubr_u(_jit, i0, r0, _R0, set, p);
+
+       return (w);
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int16_t r0)
+{
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+       JMP(r0);
+       NOP();
+}
+
+static jit_word_t
+_jmpi(jit_state_t *_jit, jit_word_t i0, jit_bool_t force)
+{
+       jit_uint16_t reg;
+       jit_int32_t disp;
+       jit_word_t w;
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       w = _jit->pc.w;
+       disp = (i0 - w >> 1) - 2;
+
+       if (force || (disp >= -2048 && disp <= 2046)) {
+               BRA(disp);
+               NOP();
+       } else if (0) {
+               /* TODO: BRAF */
+               reg = jit_get_reg(jit_class_gpr);
+
+               movi_p(rn(reg), disp - 7);
+               BRAF(rn(reg));
+               NOP();
+
+               jit_unget_reg(reg);
+       } else {
+               reg = jit_get_reg(jit_class_gpr);
+
+               movi(rn(reg), i0);
+               jmpr(rn(reg));
+
+               jit_unget_reg(reg);
+       }
+
+       return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int16_t r0)
+{
+       reset_fpu(_jit, r0 == _R0);
+
+       JSR(r0);
+       NOP();
+
+       reset_fpu(_jit, 1);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+       jit_int32_t disp;
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reset_fpu(_jit, 0);
+
+       w = _jit->pc.w;
+       disp = (i0 - w >> 1) - 2;
+
+       if (disp >= -2048 && disp <= 2046) {
+               BSR(disp);
+       } else {
+               movi(_R0, i0);
+               JSR(_R0);
+       }
+
+       NOP();
+       reset_fpu(_jit, 1);
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       jit_word_t w = _jit->pc.w;
+
+       load_const(1, r0, 0);
+
+       return (w);
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_uint16_t reg;
+    jit_word_t w;
+
+    set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    jmpr(rn(reg));
+    jit_unget_reg(reg);
+
+    return (w);
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_uint16_t reg;
+    jit_word_t w;
+
+    reset_fpu(_jit, 0);
+
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    JSR(rn(reg));
+    NOP();
+    jit_unget_reg(reg);
+
+    reset_fpu(_jit, 1);
+
+    return (w);
+}
+
+static void
+_vastart(jit_state_t *_jit, jit_int32_t r0)
+{
+       jit_int32_t reg;
+
+       assert(_jitc->function->self.call & jit_call_varargs);
+
+       /* Return jit_va_list_t in the register argument */
+       addi(r0, JIT_FP, _jitc->function->vaoff);
+       reg = jit_get_reg(jit_class_gpr);
+
+       /* Align pointer to 8 bytes with +4 bytes offset (so that the
+        * double values are aligned to 8 bytes */
+       andi(r0, r0, -8);
+       addi(r0, r0, 4);
+
+       /* Initialize the gpr begin/end pointers */
+       addi(rn(reg), r0, sizeof(jit_va_list_t)
+            + _jitc->function->vagp * sizeof(jit_uint32_t));
+       stxi(offsetof(jit_va_list_t, bgpr), r0, rn(reg));
+
+       addi(rn(reg), rn(reg), NUM_WORD_ARGS * sizeof(jit_word_t)
+            - _jitc->function->vagp * sizeof(jit_uint32_t));
+       stxi(offsetof(jit_va_list_t, egpr), r0, rn(reg));
+
+       /* Initialize the fpr begin/end pointers */
+       if (_jitc->function->vafp)
+               addi(rn(reg), rn(reg), _jitc->function->vafp * sizeof(jit_float32_t));
+
+       stxi(offsetof(jit_va_list_t, bfpr), r0, rn(reg));
+       addi(rn(reg), rn(reg), NUM_FLOAT_ARGS * sizeof(jit_float32_t)
+            - _jitc->function->vafp * sizeof(jit_float32_t));
+       stxi(offsetof(jit_va_list_t, efpr), r0, rn(reg));
+
+       /* Initialize the stack pointer to the first stack argument */
+       addi(rn(reg), JIT_FP, _jitc->function->self.size);
+       stxi(offsetof(jit_va_list_t, over), r0, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_vaarg(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t rg0, rg1;
+    jit_word_t ge_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+
+    /* Load begin/end gpr pointers */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, egpr));
+    movi(_R0, offsetof(jit_va_list_t, bgpr));
+    ldxr(rn(rg0), r1, _R0);
+
+    /* Check that we didn't reach the end gpr pointer. */
+    CMPHS(rn(rg0), rn(rg1));
+
+    ge_code = _jit->pc.w;
+    BF(0);
+
+    /* If we did, load the stack pointer instead. */
+    movi(_R0, offsetof(jit_va_list_t, over));
+    ldxr(rn(rg0), r1, _R0);
+
+    patch_at(ge_code, _jit->pc.w);
+
+    /* All good, we can now load the actual value */
+    ldxai_i(r0, rn(rg0), sizeof(jit_uint32_t));
+
+    /* Update the pointer (gpr or stack) to the next word */
+    stxr(_R0, r1, rn(rg0));
+
+    jit_unget_reg(rg0);
+    jit_unget_reg(rg1);
+}
+
+static void
+_patch_abs(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+       jit_instr_t *ptr = (jit_instr_t *)instr;
+
+       ptr[0].ni.i = (label >> 24) & 0xff;
+       ptr[2].ni.i = (label >> 16) & 0xff;
+       ptr[4].ni.i = (label >> 8) & 0xff;
+       ptr[6].ni.i = (label >> 0) & 0xff;
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+{
+       jit_instr_t *ptr = (jit_instr_t *)instr;
+       jit_int32_t disp;
+
+       switch (ptr->nmd.c) {
+       case 0xe:
+               patch_abs(instr, label);
+               break;
+       case 0xc:
+               disp = ((label - (instr & ~0x3)) >> 2) - 1;
+               assert(disp >= 0 && disp <= 255);
+               ptr->ni.i = disp;
+               break;
+       case 0xa:
+               disp = ((label - instr) >> 1) - 2;
+               assert(disp >= -2048 && disp <= 2046);
+               ptr->d.d = disp;
+               break;
+       case 0x8:
+               switch (ptr->ni.n) {
+               case 0x9:
+               case 0xb:
+               case 0xd:
+               case 0xf:
+                       disp = ((label - instr) >> 1) - 2;
+                       if (disp >= -128 && disp <= 127) {
+                               ptr->ni.i = disp;
+                       } else {
+                               /* Invert bit 1: BT(S) <-> BF(S) */
+                               ptr->ni.n ^= 1 << 1;
+
+                               /* Opcode 2 is now a BRA opcode */
+                               ptr[1].d = (struct jit_instr_d){ .c = 0xa, .d = disp - 1 };
+                       }
+                       break;
+               default:
+                       assert(!"unhandled branch opcode");
+               }
+               break;
+       case 0xd:
+               if (ptr->op & 0xff) {
+                       /* TODO: Fix the mess. patch_at() gets called with 'instr' pointing
+                        * to the mov.l opcode and 'label' being the value that should be
+                        * loaded into the register. So we read the address at which the mov.l
+                        * points to, and write the label there. */
+                       *(jit_uint32_t *)((instr & ~0x3) + 4 + (ptr->op & 0xff) * 4) = label;
+               } else {
+                       disp = ((label - instr) >> 2) - 1 + !!(instr & 0x3);
+                       ptr->op = (ptr->op & 0xff00) | disp;
+               }
+               break;
+       default:
+               assert("unhandled branch opcode");
+       }
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+       jit_uint16_t reg, regno, offs;
+
+       if (_jitc->function->define_frame || _jitc->function->assume_frame) {
+               jit_int32_t     frame = -_jitc->function->frame;
+               assert(_jitc->function->self.aoff >= frame);
+               if (_jitc->function->assume_frame)
+                       return;
+               _jitc->function->self.aoff = frame;
+       }
+
+       if (_jitc->function->allocar)
+               _jitc->function->self.aoff &= -8;
+       _jitc->function->stack = ((_jitc->function->self.alen -
+                                  /* align stack at 8 bytes */
+                                  _jitc->function->self.aoff) + 7) & -8;
+
+       ADDI(JIT_SP, -stack_framesize);
+       STDL(JIT_SP, JIT_FP, JIT_V_NUM + 1);
+
+       STSPR(_R0);
+       STDL(JIT_SP, _R0, JIT_V_NUM);
+
+       for (regno = 0; regno < JIT_V_NUM; regno++)
+               if (jit_regset_tstbit(&_jitc->function->regset, JIT_V(regno)))
+                       STDL(JIT_SP, JIT_V(regno), regno);
+
+       movr(JIT_FP, JIT_SP);
+
+       if (_jitc->function->stack)
+               subi(JIT_SP, JIT_SP, _jitc->function->stack);
+       if (_jitc->function->allocar) {
+               reg = jit_get_reg(jit_class_gpr);
+               movi(rn(reg), _jitc->function->self.aoff);
+               stxi_i(_jitc->function->aoffoff, JIT_FP, rn(reg));
+               jit_unget_reg(reg);
+       }
+
+       if (_jitc->function->self.call & jit_call_varargs) {
+               /* Align to 8 bytes with +4 bytes offset (so that the double
+                * values are aligned to 8 bytes */
+               andi(JIT_R0, JIT_FP, -8);
+               addi(JIT_R0, JIT_R0, 4);
+
+               for (regno = _jitc->function->vagp; jit_arg_reg_p(regno); regno++) {
+                       stxi(_jitc->function->vaoff
+                            + sizeof(jit_va_list_t)
+                            + regno * sizeof(jit_word_t),
+                            JIT_R0, rn(_R4 + regno));
+               }
+
+               for (regno = _jitc->function->vafp; jit_arg_f_reg_p(regno); regno++) {
+                       stxi_f(_jitc->function->vaoff
+                              + sizeof(jit_va_list_t)
+                              + NUM_WORD_ARGS * sizeof(jit_word_t)
+                              + regno * sizeof(jit_float32_t),
+                              JIT_R0, rn(_F4 + (regno ^ fpr_args_inverted())));
+               }
+       }
+
+       reset_fpu(_jit, 0);
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+       unsigned int i;
+
+       if (_jitc->function->assume_frame)
+               return;
+
+       reset_fpu(_jit, 1);
+
+       movr(JIT_SP, JIT_FP);
+
+       for (i = JIT_V_NUM; i > 0; i--)
+               if (jit_regset_tstbit(&_jitc->function->regset, JIT_V(i - 1)))
+                       LDDL(JIT_V(i - 1), JIT_SP, i - 1);
+
+       LDDL(JIT_FP, JIT_SP, JIT_V_NUM);
+       LDSPR(JIT_FP);
+
+       LDDL(JIT_FP, JIT_SP, JIT_V_NUM + 1);
+       RTS();
+       ADDI(JIT_SP, stack_framesize);
+}
+#endif /* CODE */
diff --git a/deps/lightning/lib/jit_sh-fpu.c b/deps/lightning/lib/jit_sh-fpu.c
new file mode 100644 (file)
index 0000000..e440a64
--- /dev/null
@@ -0,0 +1,2394 @@
+/*
+ * Copyright (C) 2022  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paul Cercueil
+ */
+
+#if PROTO
+static void set_fmode(jit_state_t *_jit, jit_bool_t is_double);
+static void set_fmode_no_r0(jit_state_t *_jit, jit_bool_t is_double);
+static void reset_fpu(jit_state_t *_jit, jit_bool_t no_r0);
+
+static void _extr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define extr_f(r0,r1)                        _extr_f(_jit,r0,r1,0)
+#  define extr_d(r0,r1)                        _extr_f(_jit,r0,r1,1)
+static void _truncr_f_i(jit_state_t*,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define truncr_f_i(r0,r1)            _truncr_f_i(_jit,r0,r1,0)
+#  define truncr_d_i(r0,r1)            _truncr_f_i(_jit,r0,r1,1)
+static void _fmar_f(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                   jit_uint16_t,jit_uint16_t);
+#  define fmar_f(r0, r1, r2, r3)       _fmar_f(_jit, r0, r1, r2, r3)
+static void _fmar_d(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                   jit_uint16_t,jit_uint16_t);
+#  define fmar_d(r0, r1, r2, r3)       _fmar_d(_jit, r0, r1, r2, r3)
+static void _fmsr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                   jit_uint16_t,jit_uint16_t);
+#  define fmsr_f(r0, r1, r2, r3)       _fmsr_f(_jit, r0, r1, r2, r3)
+static void _fmsr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                   jit_uint16_t,jit_uint16_t);
+#  define fmsr_d(r0, r1, r2, r3)       _fmsr_d(_jit, r0, r1, r2, r3)
+static void _fnmar_f(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                    jit_uint16_t,jit_uint16_t);
+#  define fnmar_f(r0, r1, r2, r3)      _fnmar_f(_jit, r0, r1, r2, r3)
+static void _fnmar_d(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                    jit_uint16_t,jit_uint16_t);
+#  define fnmar_d(r0, r1, r2, r3)      _fnmar_d(_jit, r0, r1, r2, r3)
+static void _fnmsr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                    jit_uint16_t,jit_uint16_t);
+#  define fnmsr_f(r0, r1, r2, r3)      _fnmsr_f(_jit, r0, r1, r2, r3)
+static void _fnmsr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,
+                    jit_uint16_t,jit_uint16_t);
+#  define fnmsr_d(r0, r1, r2, r3)      _fnmsr_d(_jit, r0, r1, r2, r3)
+static void _movr_f(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define movr_f(r0,r1)                        _movr_f(_jit,r0,r1)
+static void _movr_d(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define movr_d(r0,r1)                        _movr_d(_jit,r0,r1)
+static void _movi_f(jit_state_t*,jit_uint16_t,jit_float32_t);
+#  define movi_f(r0,i0)                        _movi_f(_jit,r0,i0)
+static void _movi_d(jit_state_t*,jit_uint16_t,jit_float64_t);
+#  define movi_d(r0,i0)                        _movi_d(_jit,r0,i0)
+static void _ltr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define ltr_f(r0,r1,r2)              _ltr_f(_jit,r0,r1,r2,0)
+#  define ltr_d(r0,r1,r2)              _ltr_f(_jit,r0,r1,r2,1)
+static void _lti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define lti_f(r0,r1,i0)              _lti_f(_jit,r0,r1,i0)
+static void _lti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define lti_d(r0,r1,i0)              _lti_d(_jit,r0,r1,i0)
+static void _ler_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define ler_f(r0,r1,r2)              _ler_f(_jit,r0,r1,r2,0)
+#  define ler_d(r0,r1,r2)              _ler_f(_jit,r0,r1,r2,1)
+static void _lei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define lei_f(r0,r1,i0)              _lei_f(_jit,r0,r1,i0)
+static void _lei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define lei_d(r0,r1,i0)              _lei_d(_jit,r0,r1,i0)
+static void _eqr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define eqr_f(r0,r1,r2)              _eqr_f(_jit,r0,r1,r2,0)
+#  define eqr_d(r0,r1,r2)              _eqr_f(_jit,r0,r1,r2,1)
+static void _eqi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define eqi_f(r0,r1,i0)              _eqi_f(_jit,r0,r1,i0)
+static void _eqi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define eqi_d(r0,r1,i0)              _eqi_d(_jit,r0,r1,i0)
+#  define ger_f(r0,r1,r2)              ler_f(r0,r2,r1)
+#  define ger_d(r0,r1,r2)              ler_d(r0,r2,r1)
+static void _gei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define gei_f(r0,r1,i0)              _gei_f(_jit,r0,r1,i0)
+static void _gei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define gei_d(r0,r1,i0)              _gei_d(_jit,r0,r1,i0)
+#  define gtr_f(r0,r1,r2)              ltr_f(r0,r2,r1)
+#  define gtr_d(r0,r1,r2)              ltr_d(r0,r2,r1)
+static void _gti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define gti_f(r0,r1,i0)              _gti_f(_jit,r0,r1,i0)
+static void _gti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define gti_d(r0,r1,i0)              _gti_d(_jit,r0,r1,i0)
+static void _ner_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define ner_f(r0,r1,r2)              _ner_f(_jit,r0,r1,r2,0)
+#  define ner_d(r0,r1,r2)              _ner_f(_jit,r0,r1,r2,1)
+static void _nei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define nei_f(r0,r1,i0)              _nei_f(_jit,r0,r1,i0)
+static void _nei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define nei_d(r0,r1,i0)              _nei_d(_jit,r0,r1,i0)
+static void _unltr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define unltr_f(r0,r1,r2)            _unltr_f(_jit,r0,r1,r2,0)
+#  define unltr_d(r0,r1,r2)            _unltr_f(_jit,r0,r1,r2,1)
+static void _unlti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define unlti_f(r0,r1,i0)            _unlti_f(_jit,r0,r1,i0)
+static void _unlti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define unlti_d(r0,r1,i0)            _unlti_d(_jit,r0,r1,i0)
+static void _unler_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,
+                    jit_bool_t);
+#  define unler_f(r0,r1,r2)            _unler_f(_jit,r0,r1,r2,0)
+#  define unler_d(r0,r1,r2)            _unler_f(_jit,r0,r1,r2,1)
+static void _unlei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define unlei_f(r0,r1,i0)            _unlei_f(_jit,r0,r1,i0)
+static void _unlei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define unlei_d(r0,r1,i0)            _unlei_d(_jit,r0,r1,i0)
+#  define ungtr_f(r0,r1,r2)            unltr_f(r0,r2,r1)
+#  define ungtr_d(r0,r1,r2)            unltr_d(r0,r2,r1)
+static void _ungti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define ungti_f(r0,r1,i0)            _ungti_f(_jit,r0,r1,i0)
+static void _ungti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define ungti_d(r0,r1,i0)            _ungti_d(_jit,r0,r1,i0)
+#  define unger_f(r0,r1,r2)            _unler_f(_jit,r0,r2,r1,0)
+#  define unger_d(r0,r1,r2)            _unler_f(_jit,r0,r2,r1,1)
+static void _ungei_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define ungei_f(r0,r1,i0)            _ungei_f(_jit,r0,r1,i0)
+static void _ungei_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define ungei_d(r0,r1,i0)            _ungei_d(_jit,r0,r1,i0)
+static void _uneqr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,
+                    jit_bool_t);
+#  define uneqr_f(r0,r1,r2)            _uneqr_f(_jit,r0,r1,r2,0)
+#  define uneqr_d(r0,r1,r2)            _uneqr_f(_jit,r0,r1,r2,1)
+static void _uneqi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define uneqi_f(r0,r1,i0)            _uneqi_f(_jit,r0,r1,i0)
+static void _uneqi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define uneqi_d(r0,r1,i0)            _uneqi_d(_jit,r0,r1,i0)
+static void _ltgtr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define ltgtr_f(r0,r1,r2)            _ltgtr_f(_jit,r0,r1,r2,0)
+#  define ltgtr_d(r0,r1,r2)            _ltgtr_f(_jit,r0,r1,r2,1)
+static void _ltgti_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define ltgti_f(r0,r1,i0)            _ltgti_f(_jit,r0,r1,i0)
+static void _ltgti_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define ltgti_d(r0,r1,i0)            _ltgti_d(_jit,r0,r1,i0)
+static void _ordr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define ordr_f(r0,r1,r2)             _ordr_f(_jit,r0,r1,r2,0)
+#  define ordr_d(r0,r1,r2)             _ordr_f(_jit,r0,r1,r2,1)
+static void _ordi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define ordi_f(r0,r1,i0)             _ordi_f(_jit,r0,r1,i0)
+static void _ordi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define ordi_d(r0,r1,i0)             _ordi_d(_jit,r0,r1,i0)
+static void _unordr_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_int16_t,jit_bool_t);
+#  define unordr_f(r0,r1,r2)           _unordr_f(_jit,r0,r1,r2,0)
+#  define unordr_d(r0,r1,r2)           _unordr_f(_jit,r0,r1,r2,1)
+static void _unordi_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_float32_t);
+#  define unordi_f(r0,r1,i0)           _unordi_f(_jit,r0,r1,i0)
+static void _unordi_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_float64_t);
+#  define unordi_d(r0,r1,i0)           _unordi_d(_jit,r0,r1,i0)
+static void _addr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t,jit_bool_t);
+#  define addr_f(r0,r1,r2)             _addr_f(_jit,r0,r1,r2,0)
+#  define addr_d(r0,r1,r2)             _addr_f(_jit,r0,r1,r2,1)
+static void _addi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t);
+#  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t);
+#  define addi_d(r0,r1,i0)             _addi_d(_jit,r0,r1,i0)
+static void _subr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define subr_f(r0,r1,r2)             _subr_f(_jit,r0,r1,r2)
+static void _subr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define subr_d(r0,r1,r2)             _subr_d(_jit,r0,r1,r2)
+static void _subi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t);
+#  define subi_f(r0,r1,i0)             _subi_f(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t);
+#  define subi_d(r0,r1,i0)             _subi_d(_jit,r0,r1,i0)
+static void _negr_f(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define negr_f(r0,r1)                        _negr_f(_jit,r0,r1)
+static void _negr_d(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define negr_d(r0,r1)                        _negr_d(_jit,r0,r1)
+#  define rsbr_f(r0,r1,r2)             subr_f(r0,r2,r1)
+static void _rsbi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t);
+#  define rsbi_f(r0,r1,i0)             _rsbi_f(_jit,r0,r1,i0)
+#  define rsbr_d(r0,r1,r2)             subr_d(r0,r2,r1)
+static void _rsbi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t);
+#  define rsbi_d(r0,r1,i0)             _rsbi_d(_jit,r0,r1,i0)
+static void _mulr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define mulr_f(r0,r1,r2)             _mulr_f(_jit,r0,r1,r2)
+static void _muli_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t);
+#  define muli_f(r0,r1,i0)             _muli_f(_jit,r0,r1,i0)
+static void _mulr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define mulr_d(r0,r1,r2)             _mulr_d(_jit,r0,r1,r2)
+static void _muli_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t);
+#  define muli_d(r0,r1,i0)             _muli_d(_jit,r0,r1,i0)
+static void _divr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define divr_f(r0,r1,r2)             _divr_f(_jit,r0,r1,r2)
+static void _divi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float32_t);
+#  define divi_f(r0,r1,i0)             _divi_f(_jit,r0,r1,i0)
+static void _divr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define divr_d(r0,r1,r2)             _divr_d(_jit,r0,r1,r2)
+static void _divi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_float64_t);
+#  define divi_d(r0,r1,i0)             _divi_d(_jit,r0,r1,i0)
+static void _movr_w_f(jit_state_t*,jit_uint16_t,jit_int16_t);
+#define movr_w_f(r0,r1)                        _movr_w_f(_jit,r0,r1)
+static void _movr_f_w(jit_state_t*,jit_uint16_t,jit_int16_t);
+#define movr_f_w(r0,r1)                        _movr_f_w(_jit,r0,r1)
+static void _movi_w_f(jit_state_t*,jit_int16_t,jit_word_t);
+# define movi_w_f(r0,i0)               _movi_w_f(_jit,r0,i0)
+static void _movr_ww_d(jit_state_t*,jit_uint16_t,jit_int16_t, jit_int16_t);
+# define movr_ww_d(r0,r1,r2)           _movr_ww_d(_jit,r0,r1,r2)
+static void _movr_d_ww(jit_state_t*,jit_uint16_t,jit_int16_t, jit_int16_t);
+# define movr_d_ww(r0,r1,r2)           _movr_d_ww(_jit,r0,r1,r2)
+static void _movi_ww_d(jit_state_t*,jit_int16_t,jit_word_t, jit_word_t);
+# define movi_ww_d(r0,i0,i1)           _movi_ww_d(_jit,r0,i0,i1)
+static void _absr_f(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define absr_f(r0,r1)                        _absr_f(_jit,r0,r1)
+static void _absr_d(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define absr_d(r0,r1)                        _absr_d(_jit,r0,r1)
+static void _sqrtr_f(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define sqrtr_f(r0,r1)               _sqrtr_f(_jit,r0,r1)
+static void _sqrtr_d(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define sqrtr_d(r0,r1)               _sqrtr_d(_jit,r0,r1)
+static void _extr_d_f(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define extr_d_f(r0,r1)              _extr_d_f(_jit,r0,r1)
+static void _extr_f_d(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define extr_f_d(r0,r1)              _extr_f_d(_jit,r0,r1)
+#  define ldr_f(r0,r1)                 LDF(r0,r1)
+static void _ldr_d(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define ldr_d(r0,r1)                 _ldr_d(_jit,r0,r1)
+static void _ldi_f(jit_state_t*,jit_uint16_t,jit_word_t);
+#  define ldi_f(r0,i0)                 _ldi_f(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_uint16_t,jit_word_t);
+#  define ldi_d(r0,i0)                 _ldi_d(_jit,r0,i0)
+static void _ldxr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define ldxr_f(r0,r1,r2)             _ldxr_f(_jit,r0,r1,r2)
+static void _ldxr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define ldxr_d(r0,r1,r2)             _ldxr_d(_jit,r0,r1,r2)
+static void _ldxi_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_word_t);
+#  define ldxi_d(r0,r1,i0)             _ldxi_d(_jit,r0,r1,i0)
+#  define unldr_x(r0,r1,i0)            fallback_unldr_x(r0,r1,i0)
+#  define unldi_x(r0,i0,i1)            fallback_unldi_x(r0,i0,i1)
+#  define str_f(r0,r1)                 STF(r0,r1)
+static void _str_d(jit_state_t*,jit_uint16_t,jit_uint16_t);
+#  define str_d(r0,r1)                 _str_d(_jit,r0,r1)
+static void _sti_f(jit_state_t*,jit_word_t,jit_uint16_t);
+#  define sti_f(i0,r0)                 _sti_f(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_uint16_t);
+#  define sti_d(i0,r0)                 _sti_d(_jit,i0,r0)
+static void _stxr_f(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define stxr_f(r0,r1,r2)             _stxr_f(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_uint16_t,jit_uint16_t,jit_uint16_t);
+#  define stxr_d(r0,r1,r2)             _stxr_d(_jit,r0,r1,r2)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+#  define stxi_f(i0,r0,r1)             _stxi_f(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t);
+#  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
+#  define unstr_x(r0,r1,i0)            fallback_unstr_x(r0,r1,i0)
+#  define unsti_x(i0,r0,i1)            fallback_unsti_x(i0,r0,i1)
+static jit_word_t _beqr_f(jit_state_t*,jit_word_t,jit_uint16_t,jit_uint16_t,
+                         jit_bool_t,jit_bool_t,jit_bool_t);
+#  define beqr_f(i0,r0,r1)             beqr_f_p(i0,r0,r1,0)
+#  define bner_f(i0,r0,r1)             bner_f_p(i0,r0,r1,0)
+#  define beqr_d(i0,r0,r1)             beqr_f_p(i0,r0,r1,0)
+#  define bner_d(i0,r0,r1)             bner_f_p(i0,r0,r1,0)
+#  define beqr_f_p(i0,r0,r1,p)         _beqr_f(_jit,i0,r0,r1,0,1,p)
+#  define bner_f_p(i0,r0,r1,p)         _beqr_f(_jit,i0,r0,r1,0,0,p)
+#  define beqr_d_p(i0,r0,r1,p)         _beqr_f(_jit,i0,r0,r1,1,1,p)
+#  define bner_d_p(i0,r0,r1,p)         _beqr_f(_jit,i0,r0,r1,1,0,p)
+static jit_word_t _beqi_f(jit_state_t*,jit_word_t,jit_uint16_t,
+                         jit_float32_t,jit_bool_t,jit_bool_t);
+#  define beqi_f(i0,r0,i1)             beqi_f_p(i0,r0,i1,0)
+#  define bnei_f(i0,r0,i1)             bnei_f_p(i0,r0,i1,0)
+#  define beqi_f_p(i0,r0,i1,p)         _beqi_f(_jit,i0,r0,i1,1,p)
+#  define bnei_f_p(i0,r0,i1,p)         _beqi_f(_jit,i0,r0,i1,0,p)
+static jit_word_t _beqi_d(jit_state_t*,jit_word_t,jit_uint16_t,
+                         jit_float64_t,jit_bool_t,jit_bool_t);
+#  define beqi_d(i0,r0,i1)             beqi_d_p(i0,r0,i1,0)
+#  define bnei_d(i0,r0,i1)             bnei_d_p(i0,r0,i1,0)
+#  define beqi_d_p(i0,r0,i1,p)         _beqi_d(_jit,i0,r0,i1,1,p)
+#  define bnei_d_p(i0,r0,i1,p)         _beqi_d(_jit,i0,r0,i1,0,p)
+static jit_word_t
+_blti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define blti_f(i0,r0,i1)             blti_f_p(i0,r0,i1,0)
+#  define blti_f_p(i0,r0,i1,p)         _blti_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_blti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define blti_d(i0,r0,i1)             blti_d_p(i0,r0,i1,0)
+#  define blti_d_p(i0,r0,i1,p)         _blti_d(_jit,i0,r0,i1,p)
+static jit_word_t _bgtr_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t,
+                         jit_bool_t,jit_bool_t,jit_bool_t);
+#  define bgtr_f(i0,r0,r1)             bgtr_f_p(i0,r0,r1,0)
+#  define bgtr_d(i0,r0,r1)             bgtr_d_p(i0,r0,r1,0)
+#  define bltr_f(i0,r0,r1)             bltr_f_p(i0,r1,r0,0)
+#  define bltr_d(i0,r0,r1)             bltr_d_p(i0,r1,r0,0)
+#  define bgtr_f_p(i0,r0,r1,p)         _bgtr_f(_jit,i0,r0,r1,0,1,p)
+#  define bgtr_d_p(i0,r0,r1,p)         _bgtr_f(_jit,i0,r0,r1,1,1,p)
+#  define bltr_f_p(i0,r0,r1,p)         _bgtr_f(_jit,i0,r1,r0,0,1,p)
+#  define bltr_d_p(i0,r0,r1,p)         _bgtr_f(_jit,i0,r1,r0,1,1,p)
+static jit_word_t
+_bgti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define bgti_f(i0,r0,i1)             bgti_f_p(i0,r0,i1,0)
+#  define bgti_f_p(i0,r0,i1,p)         _bgti_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_bgti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define bgti_d(i0,r0,i1)             bgti_d_p(i0,r0,i1,0)
+#  define bgti_d_p(i0,r0,i1,p)         _bgti_d(_jit,i0,r0,i1,p)
+static jit_word_t _bler_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t,
+                         jit_bool_t,jit_bool_t,jit_bool_t);
+#  define bler_f(i0,r0,r1)             bler_f_p(i0,r0,r1,0)
+#  define bler_d(i0,r0,r1)             bler_d_p(i0,r0,r1,0)
+#  define bler_f_p(i0,r0,r1,p)         _bler_f(_jit,i0,r0,r1,0,0,p)
+#  define bler_d_p(i0,r0,r1,p)         _bler_f(_jit,i0,r0,r1,1,0,p)
+static jit_word_t
+_blei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define blei_f(i0,r0,i1)             blei_f_p(i0,r0,i1,0)
+#  define blei_f_p(i0,r0,i1,p)         _blei_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_blei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define blei_d(i0,r0,i1)             blei_d_p(i0,r0,i1,0)
+#  define blei_d_p(i0,r0,i1,p)         _blei_d(_jit,i0,r0,i1,p)
+#  define bger_f(i0,r0,r1)             bger_f_p(i0,r1,r0,0)
+#  define bger_d(i0,r0,r1)             bger_d_p(i0,r1,r0,0)
+#  define bger_f_p(i0,r0,r1,p)         bler_f_p(i0,r1,r0,p)
+#  define bger_d_p(i0,r0,r1,p)         bler_d_p(i0,r1,r0,p)
+static jit_word_t
+_bgei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define bgei_f(i0,r0,i1)             bgei_f_p(i0,r0,i1,0)
+#  define bgei_f_p(i0,r0,i1,p)         _bgei_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_bgei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define bgei_d(i0,r0,i1)             bgei_d_p(i0,r0,i1,0)
+#  define bgei_d_p(i0,r0,i1,p)         _bgei_d(_jit,i0,r0,i1,p)
+#  define bunltr_f(i0,r0,r1)           bunltr_f_p(i0,r1,r0,0)
+#  define bunltr_d(i0,r0,r1)           bunltr_d_p(i0,r1,r0,0)
+#  define bunltr_f_p(i0,r0,r1,p)       _bler_f(_jit,i0,r1,r0,0,1,p)
+#  define bunltr_d_p(i0,r0,r1,p)       _bler_f(_jit,i0,r1,r0,1,1,p)
+static jit_word_t
+_bunlti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define bunlti_f(i0,r0,i1)           bunlti_f_p(i0,r0,i1,0)
+#  define bunlti_f_p(i0,r0,i1,p)       _bunlti_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_bunlti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define bunlti_d(i0,r0,i1)           bunlti_d_p(i0,r0,i1,0)
+#  define bunlti_d_p(i0,r0,i1,p)       _bunlti_d(_jit,i0,r0,i1,p)
+#  define bunler_f(i0,r0,r1)           bunler_f_p(i0,r0,r1,0)
+#  define bunler_d(i0,r0,r1)           bunler_d_p(i0,r0,r1,0)
+#  define bunler_f_p(i0,r0,r1,p)       _bgtr_f(_jit,i0,r0,r1,0,0,p)
+#  define bunler_d_p(i0,r0,r1,p)       _bgtr_f(_jit,i0,r0,r1,1,0,p)
+static jit_word_t
+_bunlei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define bunlei_f(i0,r0,i1)           bunlei_f_p(i0,r0,i1,0)
+#  define bunlei_f_p(i0,r0,i1,p)       _bunlei_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_bunlei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define bunlei_d(i0,r0,i1)           bunlei_d_p(i0,r0,i1,0)
+#  define bunlei_d_p(i0,r0,i1,p)       _bunlei_d(_jit,i0,r0,i1,p)
+#  define bungtr_f(i0,r0,r1)           bungtr_f_p(i0,r0,r1,0)
+#  define bungtr_d(i0,r0,r1)           bungtr_d_p(i0,r0,r1,0)
+#  define bungtr_f_p(i0,r0,r1,p)       _bler_f(_jit,i0,r0,r1,0,1,p)
+#  define bungtr_d_p(i0,r0,r1,p)       _bler_f(_jit,i0,r0,r1,1,1,p)
+static jit_word_t
+_bungti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define bungti_f(i0,r0,i1)           bungti_f_p(i0,r0,i1,0)
+#  define bungti_f_p(i0,r0,i1,p)       _bungti_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_bungti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define bungti_d(i0,r0,i1)           bungti_d_p(i0,r0,i1,0)
+#  define bungti_d_p(i0,r0,i1,p)       _bungti_d(_jit,i0,r0,i1,p)
+#  define bunger_f(i0,r0,r1)           bunger_f_p(i0,r1,r0,0)
+#  define bunger_d(i0,r0,r1)           bunger_d_p(i0,r1,r0,0)
+#  define bunger_f_p(i0,r0,r1,p)       _bgtr_f(_jit,i0,r1,r0,0,0,p)
+#  define bunger_d_p(i0,r0,r1,p)       _bgtr_f(_jit,i0,r1,r0,1,0,p)
+static jit_word_t
+_bungei_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define bungei_f(i0,r0,i1)           bungei_f_p(i0,r0,i1,0)
+#  define bungei_f_p(i0,r0,i1,p)       _bungei_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_bungei_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define bungei_d(i0,r0,i1)           bungei_d_p(i0,r0,i1,0)
+#  define bungei_d_p(i0,r0,i1,p)       _bungei_d(_jit,i0,r0,i1,p)
+static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,jit_int16_t,
+                           jit_int16_t,jit_bool_t,jit_bool_t);
+#  define buneqr_f(i0,r0,r1)           buneqr_f_p(i0,r1,r0,0)
+#  define buneqr_d(i0,r0,r1)           buneqr_d_p(i0,r1,r0,0)
+#  define buneqr_f_p(i0,r0,r1,p)       _buneqr_f(_jit,i0,r1,r0,0,p)
+#  define buneqr_d_p(i0,r0,r1,p)       _buneqr_f(_jit,i0,r1,r0,1,p)
+static jit_word_t
+_buneqi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define buneqi_f(i0,r0,i1)           buneqi_f_p(i0,r0,i1,0)
+#  define buneqi_f_p(i0,r0,i1,p)       _buneqi_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_buneqi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define buneqi_d(i0,r0,i1)           buneqi_d_p(i0,r0,i1,0)
+#  define buneqi_d_p(i0,r0,i1,p)       _buneqi_d(_jit,i0,r0,i1,p)
+static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,jit_int16_t,
+                           jit_int16_t,jit_bool_t,jit_bool_t);
+#  define bltgtr_f(i0,r0,r1)           bltgtr_f_p(i0,r1,r0,0)
+#  define bltgtr_d(i0,r0,r1)           bltgtr_d_p(i0,r1,r0,0)
+#  define bltgtr_f_p(i0,r0,r1,p)       _bltgtr_f(_jit,i0,r1,r0,0,p)
+#  define bltgtr_d_p(i0,r0,r1,p)       _bltgtr_f(_jit,i0,r1,r0,1,p)
+static jit_word_t
+_bltgti_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define bltgti_f(i0,r0,i1)           bltgti_f_p(i0,r0,i1,0)
+#  define bltgti_f_p(i0,r0,i1,p)       _bltgti_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_bltgti_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define bltgti_d(i0,r0,i1)           bltgti_d_p(i0,r0,i1,0)
+#  define bltgti_d_p(i0,r0,i1,p)       _bltgti_d(_jit,i0,r0,i1,p)
+static jit_word_t _bordr_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t,
+                          jit_bool_t,jit_bool_t,jit_bool_t);
+#  define bordr_f(i0,r0,r1)            bordr_f_p(i0,r0,r1,0)
+#  define bordr_d(i0,r0,r1)            bordr_d_p(i0,r0,r1,0)
+#  define bordr_f_p(i0,r0,r1,p)                _bordr_f(_jit,i0,r0,r1,0,1,p)
+#  define bordr_d_p(i0,r0,r1,p)                _bordr_f(_jit,i0,r0,r1,1,1,p)
+static jit_word_t
+_bordi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define bordi_f(i0,r0,i1)            bordi_f_p(i0,r0,i1,0)
+#  define bordi_f_p(i0,r0,i1,p)                _bordi_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_bordi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define bordi_d(i0,r0,i1)            bordi_d_p(i0,r0,i1,0)
+#  define bordi_d_p(i0,r0,i1,p)                _bordi_d(_jit,i0,r0,i1,p)
+#  define bunordr_f(i0,r0,r1)          bunordr_f_p(i0,r0,r1,0)
+#  define bunordr_d(i0,r0,r1)          bunordr_d_p(i0,r0,r1,0)
+#  define bunordr_f_p(i0,r0,r1,p)      _bordr_f(_jit,i0,r0,r1,0,0,p)
+#  define bunordr_d_p(i0,r0,r1,p)      _bordr_f(_jit,i0,r0,r1,1,0,p)
+static jit_word_t
+_bunordi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_float32_t,jit_bool_t);
+#  define bunordi_f(i0,r0,i1)          bunordi_f_p(i0,r0,i1,0)
+#  define bunordi_f_p(i0,r0,i1,p)      _bunordi_f(_jit,i0,r0,i1,p)
+static jit_word_t
+_bunordi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_float64_t,jit_bool_t);
+#  define bunordi_d(i0,r0,i1)          bunordi_d_p(i0,r0,i1,0)
+#  define bunordi_d_p(i0,r0,i1,p)      _bunordi_d(_jit,i0,r0,i1,p)
+#  define ldxbi_f(r0,r1,i0)            generic_ldxbi_f(r0,r1,i0)
+#  define ldxbi_d(r0,r1,i0)            generic_ldxbi_d(r0,r1,i0)
+static void
+_ldxai_f(jit_state_t*,jit_int16_t,jit_int16_t,jit_word_t);
+#  define ldxai_f(r0,r1,i0)            _ldxai_f(_jit,r0,r1,i0)
+static void
+_ldxai_d(jit_state_t*,jit_int16_t,jit_int16_t,jit_word_t);
+#  define ldxai_d(r0,r1,i0)            _ldxai_d(_jit,r0,r1,i0)
+static void
+_stxbi_f(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t);
+#  define stxbi_f(i0,r0,r1)            _stxbi_f(_jit,i0,r0,r1)
+static void
+_stxbi_d(jit_state_t*,jit_word_t,jit_int16_t,jit_int16_t);
+#  define stxbi_d(i0,r0,r1)            _stxbi_d(_jit,i0,r0,r1)
+#  define stxai_f(i0,r0,r1)            generic_stxai_f(i0,r0,r1)
+#  define stxai_d(i0,r0,r1)            generic_stxai_d(i0,r0,r1)
+static void _vaarg_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
+#endif /* PROTO */
+
+#if CODE
+static void set_fmode_mask(jit_state_t *_jit, jit_uint32_t mask, jit_bool_t no_r0)
+{
+       jit_uint16_t reg, reg2;
+
+       if (SH_HAS_FPU && _jitc->uses_fpu) {
+               if (no_r0) {
+                       reg = jit_get_reg(jit_class_gpr);
+                       reg2 = jit_get_reg(jit_class_gpr);
+
+                       movi(rn(reg2), mask);
+                       STSFP(rn(reg));
+                       xorr(rn(reg), rn(reg), rn(reg2));
+                       LDSFP(rn(reg));
+
+                       jit_unget_reg(reg);
+                       jit_unget_reg(reg2);
+               } else {
+                       STSFP(_R0);
+                       SWAPW(_R0, _R0);
+                       XORI(mask >> 16);
+                       SWAPW(_R0, _R0);
+                       LDSFP(_R0);
+               }
+       }
+}
+
+static void set_fmode(jit_state_t *_jit, jit_bool_t is_double)
+{
+       if (SH_HAS_FPU && !SH_SINGLE_ONLY && _jitc->uses_fpu && _jitc->mode_d != is_double) {
+               set_fmode_mask(_jit, PR_FLAG, 0);
+               _jitc->mode_d = is_double;
+       }
+}
+
+static void reset_fpu(jit_state_t *_jit, jit_bool_t no_r0)
+{
+       if (SH_HAS_FPU && _jitc->uses_fpu) {
+               if (_jitc->mode_d != SH_DEFAULT_FPU_MODE)
+                       set_fmode_mask(_jit, PR_FLAG | FR_FLAG, no_r0);
+               else if (SH_DEFAULT_FPU_MODE)
+                       set_fmode_mask(_jit, FR_FLAG, no_r0);
+               else
+                       maybe_emit_frchg();
+
+               _jitc->mode_d = SH_DEFAULT_FPU_MODE;
+       }
+}
+
+static void set_fmode_no_r0(jit_state_t *_jit, jit_bool_t is_double)
+{
+       if (SH_HAS_FPU && _jitc->uses_fpu && !SH_SINGLE_ONLY && _jitc->mode_d != is_double) {
+               set_fmode_mask(_jit, PR_FLAG, 1);
+               _jitc->mode_d = is_double;
+       }
+}
+
+static void _extr_f(jit_state_t *_jit, jit_int16_t r0,
+                   jit_int16_t r1, jit_bool_t is_double)
+{
+       set_fmode(_jit, is_double);
+
+       LDS(r1);
+       FLOAT(r0);
+}
+
+static void _truncr_f_i(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1,
+                       jit_bool_t is_double)
+{
+       set_fmode(_jit, is_double);
+
+       FTRC(r1);
+       STSUL(r0);
+}
+
+static void _fmar_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                   jit_uint16_t r2, jit_uint16_t r3)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 0);
+
+       reg = jit_get_reg(_F0 | jit_class_fpr | jit_class_named | jit_class_chk);
+
+       if (reg == JIT_NOREG) {
+               reg = jit_get_reg(jit_class_fpr);
+               mulr_f(rn(reg), r1, r2);
+               addr_f(r0, rn(reg), r3);
+       } else if (r0 == r2) {
+               movr_f(rn(reg), r2);
+               movr_f(r0, r3);
+               FMAC(r0, r1);
+       } else {
+               movr_f(rn(reg), r1);
+               movr_f(r0, r3);
+               FMAC(r0, r2);
+       }
+
+       jit_unget_reg(reg);
+}
+
+static void _fmar_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                   jit_uint16_t r2, jit_uint16_t r3)
+{
+       jit_uint16_t reg;
+
+       if (r0 == r3) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               mulr_d(rn(reg), r1, r2);
+               addr_d(r0, rn(reg), r3);
+
+               jit_unget_reg(reg);
+       } else {
+               mulr_d(r0, r1, r2);
+               addr_d(r0, r0, r3);
+       }
+}
+
+static void _fmsr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                   jit_uint16_t r2, jit_uint16_t r3)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 0);
+
+       reg = jit_get_reg(_F0 | jit_class_fpr | jit_class_named | jit_class_chk);
+
+       if (reg == JIT_NOREG) {
+               reg = jit_get_reg(jit_class_fpr);
+               mulr_f(rn(reg), r1, r2);
+               subr_f(r0, rn(reg), r3);
+       } else if (r0 == r2) {
+               movr_f(rn(reg), r2);
+               movr_f(r0, r3);
+               FNEG(r0);
+               FMAC(r0, r1);
+       } else {
+               movr_f(rn(reg), r1);
+               movr_f(r0, r3);
+               FNEG(r0);
+               FMAC(r0, r2);
+       }
+
+       jit_unget_reg(reg);
+}
+
+static void _fmsr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                   jit_uint16_t r2, jit_uint16_t r3)
+{
+       jit_uint16_t reg;
+
+       if (r0 == r3) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               mulr_d(rn(reg), r1, r2);
+               subr_d(r0, rn(reg), r3);
+
+               jit_unget_reg(reg);
+       } else {
+               mulr_d(r0, r1, r2);
+               subr_d(r0, r0, r3);
+       }
+}
+
+static void _fnmsr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                    jit_uint16_t r2, jit_uint16_t r3)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 0);
+
+       reg = jit_get_reg(_F0 | jit_class_fpr | jit_class_named | jit_class_chk);
+
+       if (reg == JIT_NOREG) {
+               fmsr_f(r0, r1, r2, r3);
+               negr_f(r0, r0);
+       } else {
+               if (r0 == r2) {
+                       movr_f(rn(reg), r2);
+                       FNEG(rn(reg));
+                       movr_f(r0, r3);
+                       FMAC(r0, r1);
+               } else {
+                       movr_f(rn(reg), r1);
+                       FNEG(rn(reg));
+                       movr_f(r0, r3);
+                       FMAC(r0, r2);
+               }
+
+               jit_unget_reg(reg);
+       }
+}
+
+static void _fnmsr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                    jit_uint16_t r2, jit_uint16_t r3)
+{
+       fmsr_d(r0, r1, r2, r3);
+       negr_d(r0, r0);
+}
+
+static void _fnmar_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                    jit_uint16_t r2, jit_uint16_t r3)
+{
+       fmar_f(r0, r1, r2, r3);
+       negr_f(r0, r0);
+}
+
+static void _fnmar_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                    jit_uint16_t r2, jit_uint16_t r3)
+{
+       fmar_d(r0, r1, r2, r3);
+       negr_d(r0, r0);
+}
+
+static void _movr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (r0 != r1) {
+               if (r0 >= _XF0 || r1 >= _XF0) {
+                       set_fmode(_jit, 0);
+
+                       if (r0 >= _XF0 && r1 >= _XF0) {
+                               maybe_emit_frchg();
+                               FMOV(r0 - _XF0, r1 - _XF0);
+                               FRCHG();
+                       } else if (r0 >= _XF0) {
+                               FLDS(r1);
+                               FRCHG();
+                               FSTS(r0 - _XF0);
+                               FRCHG();
+                       } else {
+                               maybe_emit_frchg();
+                               FLDS(r1 - _XF0);
+                               FRCHG();
+                               FSTS(r0);
+                       }
+               } else {
+                       FMOV(r0, r1);
+               }
+       }
+}
+
+static void _movr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (r0 != r1) {
+               if (SH_SINGLE_ONLY) {
+                       movr_f(r0, r1);
+               } else if (r0 >= _XF0 || r1 >= _XF0) {
+                       set_fmode(_jit, 0);
+                       maybe_emit_fschg();
+
+                       if (r0 >= _XF0 && r1 >= _XF0)
+                               FMOVXX(r0 - _XF0, r1 - _XF0);
+                       else if (r0 >= _XF0)
+                               FMOVXD(r0 - _XF0, r1);
+                       else
+                               FMOVDX(r0, r1 - _XF0);
+
+                       FSCHG();
+               } else {
+                       FMOV(r0, r1);
+                       FMOV(r0 + 1, r1 + 1);
+               }
+       }
+}
+
+static void _movi_f(jit_state_t *_jit, jit_uint16_t r0, jit_float32_t i0)
+{
+       jit_bool_t is_bank = r0 >= _XF0;
+
+       set_fmode(_jit, 0);
+
+       if (is_bank) {
+               maybe_emit_frchg();
+               r0 -= _XF0;
+       }
+
+       if (i0 == 0.0f) {
+               FLDI0(r0);
+       } else if (i0 == -0.0f) {
+               FLDI0(r0);
+               FNEG(r0);
+       } else if (i0 == 1.0f) {
+               FLDI1(r0);
+       } else if (i0 == -1.0f) {
+               FLDI1(r0);
+               FNEG(r0);
+       } else {
+               load_const_f(0, r0, i0);
+       }
+
+       if (is_bank)
+               FRCHG();
+}
+
+static void _movi_d(jit_state_t *_jit, jit_uint16_t r0, jit_float64_t i0)
+{
+       union fl64 {
+               struct {
+                       jit_uint32_t hi;
+                       jit_uint32_t lo;
+               };
+               jit_float64_t f;
+       };
+
+       if (SH_SINGLE_ONLY) {
+               movi_f(r0, (jit_float32_t)i0);
+       } else if (r0 >= _XF0) {
+               set_fmode(_jit, 0);
+               maybe_emit_frchg();
+
+               movi_w_f(r0 + 1 - _XF0, ((union fl64)i0).hi);
+               movi_w_f(r0 - _XF0, ((union fl64)i0).lo);
+
+               FRCHG();
+       } else {
+               movi_w_f(r0 + 1, ((union fl64)i0).hi);
+               movi_w_f(r0, ((union fl64)i0).lo);
+       }
+}
+
+static void _ltr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1,
+                  jit_int16_t r2, jit_bool_t is_double)
+{
+       set_fmode(_jit, is_double);
+
+       FCMPGT(r2, r1);
+       MOVT(r0);
+}
+
+static void
+_lti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg;
+
+       reg = jit_get_reg(jit_class_fpr);
+       movi_f(rn(reg), i0);
+
+       ltr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_lti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg;
+
+       reg = jit_get_reg(jit_class_fpr);
+       movi_d(rn(reg), i0);
+
+       ltr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void _ler_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1,
+                  jit_int16_t r2, jit_bool_t is_double)
+{
+       jit_uint16_t reg;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       set_fmode(_jit, is_double);
+
+       MOVI(_R0, 0);
+       FCMPEQ(r1, r1);
+       BF(5);
+       FCMPEQ(r2, r2);
+       BF(3);
+
+       FCMPGT(r1, r2);
+       MOVT(_R0);
+       BRA(13 + is_double);
+       XORI(1);
+
+       if (is_double)
+               movr_w_f(rn(reg), _R0);
+       else
+               FLDI0(rn(reg));
+       FCMPGT(rn(reg), r1);
+       MOVT(_R0);
+       FCMPGT(r1, rn(reg));
+       ROTL(_R0);
+       TST(_R0, _R0);
+       BT(5);
+
+       FCMPGT(rn(reg), r2);
+       MOVT(_R0);
+       FCMPGT(r2, rn(reg));
+       ROTL(_R0);
+       TST(_R0, _R0);
+       BF(-18 - is_double);
+
+       movr(r0, _R0);
+
+       jit_unget_reg(reg);
+}
+
+static void
+_lei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       ler_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_lei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       ler_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void _eqr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1,
+                  jit_int16_t r2, jit_bool_t is_double)
+{
+       set_fmode(_jit, is_double);
+
+       FCMPEQ(r1, r2);
+       MOVT(r0);
+}
+
+static void
+_eqi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       eqr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_eqi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       eqr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_gei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       ger_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_gei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       ger_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_gti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       gtr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_gti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       gtr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ner_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2,
+       jit_bool_t is_double)
+{
+       _eqr_f(_jit, _R0, r1, r2, is_double);
+       XORI(1);
+       movr(r0, _R0);
+}
+
+static void
+_nei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       ner_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_nei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       ner_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_unltr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2,
+        jit_bool_t is_double)
+{
+       _ler_f(_jit, _R0, r2, r1, is_double);
+       XORI(1);
+       movr(r0, _R0);
+}
+
+static void
+_unlti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       unltr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_unlti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       unltr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_unler_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2,
+        jit_bool_t is_double)
+{
+       _ltr_f(_jit, _R0, r2, r1, is_double);
+       XORI(1);
+       movr(r0, _R0);
+}
+
+static void
+_unlei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       unler_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_unlei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       unler_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ungti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       ungtr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ungti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       ungtr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ungei_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       unger_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ungei_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       unger_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_uneqr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2,
+        jit_bool_t is_double)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_gpr);
+
+       _unler_f(_jit, rn(reg), r2, r1, is_double);
+       _unler_f(_jit, r0, r1, r2, is_double);
+       andr(r0, r0, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_uneqi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       uneqr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_uneqi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       uneqr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ltgtr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2,
+        jit_bool_t is_double)
+{
+       _uneqr_f(_jit, r0, r1, r2, is_double);
+       xori(r0, r0, 1);
+}
+
+static void
+_ltgti_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       ltgtr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ltgti_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       ltgtr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ordr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2,
+       jit_bool_t is_double)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_gpr);
+
+       _eqr_f(_jit, rn(reg), r1, r1, is_double);
+       _eqr_f(_jit, r0, r2, r2, is_double);
+       andr(r0, r0, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ordi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       ordr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_ordi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       ordr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_unordr_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_int16_t r2,
+         jit_bool_t is_double)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_gpr);
+
+       _ner_f(_jit, rn(reg), r1, r1, is_double);
+       _ner_f(_jit, r0, r2, r2, is_double);
+       orr(r0, r0, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_unordi_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       unordr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_unordi_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       unordr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_addr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+       jit_uint16_t r2, jit_bool_t is_double)
+{
+       set_fmode(_jit, is_double);
+
+       if (r0 == r2) {
+               FADD(r0, r1);
+       } else {
+               if (is_double)
+                       movr_d(r0, r1);
+               else
+                       movr_f(r0, r1);
+               FADD(r0, r2);
+       }
+}
+
+static void
+_addi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 0);
+
+       if (r0 == r1) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movi_f(rn(reg), i0);
+               FADD(r0, rn(reg));
+
+               jit_unget_reg(reg);
+       } else {
+               movi_f(r0, i0);
+               FADD(r0, r1);
+       }
+}
+
+static void _addi_d(jit_state_t *_jit, jit_uint16_t r0,
+                   jit_uint16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 1);
+
+       if (r0 == r1) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movi_d(rn(reg), i0);
+               FADD(r0, rn(reg));
+
+               jit_unget_reg(reg);
+       } else {
+               movi_d(r0, i0);
+               FADD(r0, r1);
+       }
+}
+
+static void
+_subr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 0);
+
+       if (r1 == r2) {
+               movi_f(r0, 0.0f);
+       } else if (r0 == r2) {
+               FNEG(r0);
+               FADD(r0, r1);
+       } else {
+               movr_f(r0, r1);
+               FSUB(r0, r2);
+       }
+}
+
+static void
+_subr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 1);
+
+       if (r1 == r2) {
+               movi_d(r0, 0.0);
+       } else if (r0 == r2) {
+               FNEG(r0);
+               FADD(r0, r1);
+       } else {
+               movr_d(r0, r1);
+               FSUB(r0, r2);
+       }
+}
+
+static void
+_subi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 0);
+
+       if (r0 == r1) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movi_f(rn(reg), i0);
+               FSUB(r0, rn(reg));
+
+               jit_unget_reg(reg);
+       } else {
+               movi_f(r0, -i0);
+               FADD(r0, r1);
+       }
+}
+
+static void
+_subi_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 1);
+
+       if (r0 == r1) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movi_d(rn(reg), i0);
+               FSUB(r0, rn(reg));
+
+               jit_unget_reg(reg);
+       } else {
+               movi_d(r0, -i0);
+               FADD(r0, r1);
+       }
+}
+
+static void
+_rsbi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 0);
+
+       if (r0 == r1) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movi_f(rn(reg), i0);
+               subr_f(r0, rn(reg), r0);
+
+               jit_unget_reg(reg);
+       } else {
+               movi_f(r0, i0);
+               FSUB(r0, r1);
+       }
+}
+
+static void
+_rsbi_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 1);
+
+       if (r0 == r1) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movi_d(rn(reg), i0);
+               subr_d(r0, rn(reg), r0);
+
+               jit_unget_reg(reg);
+       } else {
+               movi_d(r0, i0);
+               FSUB(r0, r1);
+       }
+}
+
+static void
+_mulr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       set_fmode(_jit, 0);
+
+       if (r0 == r2) {
+               FMUL(r0, r1);
+       } else {
+               movr_f(r0, r1);
+               FMUL(r0, r2);
+       }
+}
+
+static void
+_muli_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg;
+
+       if (r0 == r1) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movi_f(rn(reg), i0);
+               mulr_f(r0, r1, rn(reg));
+
+               jit_unget_reg(reg);
+       } else {
+               movi_f(r0, i0);
+               mulr_f(r0, r0, r1);
+       }
+}
+
+static void
+_mulr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       set_fmode(_jit, 1);
+
+       if (r0 == r2) {
+               FMUL(r0, r1);
+       } else {
+               movr_d(r0, r1);
+               FMUL(r0, r2);
+       }
+}
+
+static void
+_muli_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg;
+
+       if (r0 == r1) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movi_d(rn(reg), i0);
+               mulr_d(r0, r1, rn(reg));
+
+               jit_unget_reg(reg);
+       } else {
+               movi_d(r0, i0);
+               mulr_d(r0, r0, r1);
+       }
+}
+
+static void
+_divr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 0);
+
+       if (r0 == r2) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movr_f(rn(reg), r2);
+               movr_f(r0, r1);
+               FDIV(r0, rn(reg));
+
+               jit_unget_reg(reg);
+       } else {
+               movr_f(r0, r1);
+               FDIV(r0, r2);
+       }
+}
+
+static void
+_divi_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float32_t i0)
+{
+       jit_uint16_t reg;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i0);
+       divr_f(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void
+_divr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_uint16_t r2)
+{
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 1);
+
+       if (r0 == r2) {
+               reg = jit_get_reg(jit_class_fpr);
+
+               movr_d(rn(reg), r2);
+               movr_d(r0, r1);
+               FDIV(r0, rn(reg));
+
+               jit_unget_reg(reg);
+       } else {
+               movr_d(r0, r1);
+               FDIV(r0, r2);
+       }
+}
+
+static void
+_divi_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1, jit_float64_t i0)
+{
+       jit_uint16_t reg;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i0);
+       divr_d(r0, r1, rn(reg));
+
+       jit_unget_reg(reg);
+}
+
+static void _absr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       set_fmode(_jit, 0);
+
+       movr_f(r0, r1);
+       FABS(r0);
+}
+
+static void _absr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       set_fmode(_jit, 1);
+
+       movr_d(r0, r1);
+       FABS(r0);
+}
+
+static void _sqrtr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       set_fmode(_jit, 0);
+
+       movr_f(r0, r1);
+       FSQRT(r0);
+}
+
+static void _sqrtr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       set_fmode(_jit, 1);
+
+       movr_d(r0, r1);
+       FSQRT(r0);
+}
+
+static void _negr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       set_fmode(_jit, 0);
+
+       movr_f(r0, r1);
+       FNEG(r0);
+}
+
+static void _negr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       set_fmode(_jit, 1);
+
+       movr_d(r0, r1);
+       FNEG(r0);
+}
+
+static void _extr_d_f(jit_state_t *_jit,jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (SH_SINGLE_ONLY) {
+               movr_f(r0, r1);
+       } else {
+               set_fmode(_jit, 1);
+               FCNVDS(r1);
+               set_fmode(_jit, 0);
+               FSTS(r0);
+       }
+}
+
+static void _extr_f_d(jit_state_t *_jit,jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (SH_SINGLE_ONLY) {
+               movr_f(r0, r1);
+       } else {
+               set_fmode(_jit, 0);
+               FLDS(r1);
+               set_fmode(_jit, 1);
+               FCNVSD(r0);
+       }
+}
+
+static void _ldr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (SH_SINGLE_ONLY) {
+               ldr_f(r0, r1);
+       } else {
+               movr(_R0, r1);
+               LDFS(r0 + 1, _R0);
+               LDF(r0, _R0);
+       }
+}
+
+static void _ldi_f(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       movi(_R0, i0);
+       ldr_f(r0, _R0);
+}
+
+static void _ldi_d(jit_state_t *_jit, jit_uint16_t r0, jit_word_t i0)
+{
+       movi(_R0, i0);
+       ldr_d(r0, _R0);
+}
+
+static void _ldxr_f(jit_state_t *_jit, jit_uint16_t r0,
+                   jit_uint16_t r1, jit_uint16_t r2)
+{
+       movr(_R0, r2);
+       LDXF(r0, r1);
+}
+
+static void _ldxr_d(jit_state_t *_jit, jit_uint16_t r0,
+                   jit_uint16_t r1, jit_uint16_t r2)
+{
+       if (SH_SINGLE_ONLY) {
+               ldxr_f(r0, r1, r2);
+       } else {
+               addr(_R0, r1, r2);
+               ldr_d(r0, _R0);
+       }
+}
+
+static void _ldxi_f(jit_state_t *_jit, jit_uint16_t r0,
+                   jit_uint16_t r1, jit_word_t i0)
+{
+       movi(_R0, i0);
+       ldxr_f(r0, r1, _R0);
+}
+
+static void _ldxi_d(jit_state_t *_jit, jit_uint16_t r0,
+                   jit_uint16_t r1, jit_word_t i0)
+{
+       movi(_R0, i0);
+       ldxr_d(r0, r1, _R0);
+}
+
+static void _str_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1)
+{
+       if (SH_SINGLE_ONLY) {
+               str_f(r0, r1);
+       } else {
+               STF(r0, r1 + 1);
+               movi(_R0, 4);
+               STXF(r0, r1);
+       }
+}
+
+static void _sti_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0)
+{
+       movi(_R0, i0);
+       STF(_R0, r0);
+}
+
+static void _sti_d(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0)
+{
+       if (SH_SINGLE_ONLY) {
+               sti_f(i0, r0);
+       } else {
+               movi(_R0, i0 + 8);
+               STFS(_R0, r0);
+               STFS(_R0, r0 + 1);
+       }
+}
+
+static void _stxr_f(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                   jit_uint16_t r2)
+{
+       movr(_R0, r0);
+       STXF(r1, r2);
+}
+
+static void _stxr_d(jit_state_t *_jit, jit_uint16_t r0, jit_uint16_t r1,
+                   jit_uint16_t r2)
+{
+       if (SH_SINGLE_ONLY) {
+               stxr_f(r0, r1, r2);
+       } else {
+               movr(_R0, r0);
+               STXF(r1, r2 + 1);
+               addi(_R0, _R0, 4);
+               STXF(r1, r2);
+       }
+}
+
+static void _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                   jit_uint16_t r1)
+{
+       movi(_R0, i0);
+       stxr_f(_R0, r0, r1);
+}
+
+static void _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                   jit_uint16_t r1)
+{
+       movi(_R0, i0);
+       stxr_d(_R0, r0, r1);
+}
+
+static jit_word_t _beqr_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                         jit_uint16_t r1, jit_bool_t is_double,
+                         jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, is_double);
+
+       FCMPEQ(r0, r1);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _beqi_f(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                         jit_float32_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 0);
+
+       reg = jit_get_reg(jit_class_fpr);
+       movi_f(rn(reg), i1);
+
+       FCMPEQ(r0, rn(reg));
+       jit_unget_reg(reg);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _beqi_d(jit_state_t *_jit, jit_word_t i0, jit_uint16_t r0,
+                         jit_float64_t i1, jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+       jit_uint16_t reg;
+
+       set_fmode(_jit, 1);
+
+       reg = jit_get_reg(jit_class_fpr);
+       movi_d(rn(reg), i1);
+
+       FCMPEQ(r0, rn(reg));
+       jit_unget_reg(reg);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t _bgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+                         jit_int16_t r1, jit_bool_t is_double,
+                         jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, is_double);
+
+       FCMPGT(r0, r1);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t
+_blti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+       jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bltr_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_blti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+       jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bltr_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bgti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+       jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bgtr_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bgti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+       jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bgtr_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t _bler_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+                         jit_int16_t r1, jit_bool_t is_double,
+                         jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       set_fmode(_jit, is_double);
+
+       FCMPGT(r1, r0);
+       MOVT(_R0);
+       FCMPEQ(r0, r1);
+       ROTCL(_R0);
+       TSTI(3);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, set, p);
+
+       return (w);
+}
+
+static jit_word_t
+_blei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+       jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bler_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_blei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+       jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bler_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bgei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+       jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bger_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bgei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+       jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bger_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t _buneqr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+                           jit_int16_t r1, jit_bool_t is_double, jit_bool_t p)
+{
+       jit_word_t w;
+
+       _uneqr_f(_jit, _R0, r0, r1, is_double);
+       TST(_R0, _R0);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, 0, p);
+
+       return (w);
+}
+
+static jit_word_t _bltgtr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+                           jit_int16_t r1, jit_bool_t is_double, jit_bool_t p)
+{
+       jit_word_t w;
+
+       _ltgtr_f(_jit, _R0, r0, r1, is_double);
+       TST(_R0, _R0);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, 0, p);
+
+       return (w);
+}
+
+static jit_word_t _bordr_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+                          jit_int16_t r1, jit_bool_t is_double,
+                          jit_bool_t set, jit_bool_t p)
+{
+       jit_word_t w;
+
+       _ordr_f(_jit, _R0, r0, r1, is_double);
+       TST(_R0, _R0);
+
+       set_fmode(_jit, SH_DEFAULT_FPU_MODE);
+
+       w = _jit->pc.w;
+       emit_branch_opcode(_jit, i0, w, !set, p);
+
+       return (w);
+}
+
+static jit_word_t
+_bunlti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bunltr_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bunlti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bunltr_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bunlei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bunler_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bunlei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bunler_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bungti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bungtr_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bungti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bungtr_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bungei_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bunger_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bungei_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bunger_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_buneqi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = buneqr_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_buneqi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = buneqr_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bltgti_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bltgtr_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bltgti_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+         jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bltgtr_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bordi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+        jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bordr_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bordi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+        jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bordr_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bunordi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+          jit_float32_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_f(rn(reg), i1);
+       w = bunordr_f_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static jit_word_t
+_bunordi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0,
+          jit_float64_t i1, jit_bool_t p)
+{
+       jit_uint16_t reg;
+       jit_word_t w;
+
+       reg = jit_get_reg(jit_class_fpr);
+
+       movi_d(rn(reg), i1);
+       w = bunordr_d_p(i0, r0, rn(reg), p);
+
+       jit_unget_reg(reg);
+
+       return w;
+}
+
+static void
+_ldxai_f(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_word_t i0)
+{
+    if (i0 == 4)
+        LDFS(r0, r1);
+    else
+        generic_ldxai_f(r0, r1, i0);
+}
+
+static void
+_ldxai_d(jit_state_t *_jit, jit_int16_t r0, jit_int16_t r1, jit_word_t i0)
+{
+       if (SH_SINGLE_ONLY) {
+               ldxai_f(r0, r1, i0);
+       } else if (i0 == 8) {
+               LDFS(r0 + 1, r1);
+               LDFS(r0, r1);
+       } else {
+               generic_ldxai_d(r0, r1, i0);
+       }
+}
+
+static void
+_stxbi_f(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, jit_int16_t r1)
+{
+    if (i0 == -4)
+        STFS(r0, r1);
+    else
+        generic_stxbi_f(i0, r0, r1);
+}
+
+static void
+_stxbi_d(jit_state_t *_jit, jit_word_t i0, jit_int16_t r0, jit_int16_t r1)
+{
+       if (SH_SINGLE_ONLY) {
+               stxbi_f(i0, r0, r1);
+       } else if (i0 == -8) {
+               STFS(r0, r1);
+               STFS(r0, r1 + 1);
+       } else {
+               generic_stxbi_d(i0, r0, r1);
+       }
+}
+
+static void _movr_w_f(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1)
+{
+       LDS(r1);
+       FSTS(r0);
+}
+
+static void _movr_f_w(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1)
+{
+       FLDS(r1);
+       STSUL(r0);
+}
+
+static void _movi_w_f(jit_state_t *_jit, jit_int16_t r0, jit_word_t i0)
+{
+       movi(_R0, i0);
+       movr_w_f(r0, _R0);
+}
+
+static void _movr_ww_d(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1, jit_int16_t r2)
+{
+       /* TODO: single-only */
+       movr_w_f(r0 + 1, r1);
+       movr_w_f(r0, r2);
+}
+
+static void _movr_d_ww(jit_state_t *_jit, jit_uint16_t r0, jit_int16_t r1, jit_int16_t r2)
+{
+       /* TODO: single-only */
+       movr_f_w(r0, r2 + 1);
+       movr_f_w(r1, r2);
+}
+
+static void _movi_ww_d(jit_state_t *_jit, jit_int16_t r0, jit_word_t i0, jit_word_t i1)
+{
+       /* TODO: single-only */
+       movi_w_f(r0, i1);
+       movi_w_f(r0 + 1, i0);
+}
+
+static void
+_vaarg_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t rg0, rg1;
+    jit_word_t ge_code;
+
+    assert(_jitc->function->self.call & jit_call_varargs);
+
+    rg0 = jit_get_reg(jit_class_gpr);
+    rg1 = jit_get_reg(jit_class_gpr);
+
+    /* Load begin/end gpr pointers */
+    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, efpr));
+    movi(_R0, offsetof(jit_va_list_t, bfpr));
+    ldxr(rn(rg0), r1, _R0);
+
+    /* Check that we didn't reach the end gpr pointer. */
+    CMPHS(rn(rg0), rn(rg1));
+
+    ge_code = _jit->pc.w;
+    BF(0);
+
+    /* If we did, load the stack pointer instead. */
+    movi(_R0, offsetof(jit_va_list_t, over));
+    ldxr(rn(rg0), r1, _R0);
+
+    patch_at(ge_code, _jit->pc.w);
+
+    /* All good, we can now load the actual value */
+    ldxai_d(r0, rn(rg0), sizeof(jit_float64_t));
+
+    /* Update the pointer (gpr or stack) to the next word */
+    stxr(_R0, r1, rn(rg0));
+
+    jit_unget_reg(rg0);
+    jit_unget_reg(rg1);
+}
+
+#endif /* CODE */
diff --git a/deps/lightning/lib/jit_sh-sz.c b/deps/lightning/lib/jit_sh-sz.c
new file mode 100644 (file)
index 0000000..0b02cbf
--- /dev/null
@@ -0,0 +1,598 @@
+#define JIT_INSTR_MAX 116
+    0, /* data */
+    0, /* live */
+    4, /* align */
+    0, /* save */
+    0, /* load */
+    4, /* skip */
+    0, /* #name */
+    0, /* #note */
+    0, /* label */
+    40,        /* prolog */
+    0, /* ellipsis */
+    0, /* va_push */
+    0, /* allocai */
+    0, /* allocar */
+    0, /* arg_c */
+    0, /* arg_s */
+    0, /* arg_i */
+    0, /* arg_l */
+    0, /* getarg_c */
+    0, /* getarg_uc */
+    0, /* getarg_s */
+    0, /* getarg_us */
+    0, /* getarg_i */
+    0, /* getarg_ui */
+    0, /* getarg_l */
+    0, /* putargr_c */
+    0, /* putargi_c */
+    0, /* putargr_uc */
+    0, /* putargi_uc */
+    0, /* putargr_s */
+    0, /* putargi_s */
+    0, /* putargr_us */
+    0, /* putargi_us */
+    0, /* putargr_i */
+    0, /* putargi_i */
+    0, /* putargr_ui */
+    0, /* putargi_ui */
+    0, /* putargr_l */
+    0, /* putargi_l */
+    4, /* va_start */
+    8, /* va_arg */
+    8, /* va_arg_d */
+    0, /* va_end */
+    4, /* addr */
+    10,        /* addi */
+    6, /* addcr */
+    12,        /* addci */
+    4, /* addxr */
+    10,        /* addxi */
+    4, /* subr */
+    10,        /* subi */
+    8, /* subcr */
+    14,        /* subci */
+    6, /* subxr */
+    12,        /* subxi */
+    10,        /* rsbi */
+    4, /* mulr */
+    10,        /* muli */
+    6, /* qmulr */
+    12,        /* qmuli */
+    6, /* qmulr_u */
+    12,        /* qmuli_u */
+    34,        /* divr */
+    40,        /* divi */
+    24,        /* divr_u */
+    30,        /* divi_u */
+    44,        /* qdivr */
+    50,        /* qdivi */
+    34,        /* qdivr_u */
+    40,        /* qdivi_u */
+    44,        /* remr */
+    50,        /* remi */
+    34,        /* remr_u */
+    40,        /* remi_u */
+    4, /* andr */
+    10,        /* andi */
+    4, /* orr */
+    10,        /* ori */
+    4, /* xorr */
+    10,        /* xori */
+#  if defined(__SH3__) || defined(__SH4__) || defined(__SH4_NOFPU__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+    6, /* lshr */
+    12,        /* lshi */
+    6, /* rshr */
+    12,        /* rshi */
+    6, /* rshr_u */
+    12,        /* rshi_u */
+#else
+    16,        /* lshr */
+    22,        /* lshi */
+    14,        /* rshr */
+    20,        /* rshi */
+    14,        /* rshr_u */
+    20,        /* rshi_u */
+#endif
+    2, /* negr */
+    4, /* negi */
+    2, /* comr */
+    4, /* comi */
+    4, /* ltr */
+    4, /* lti */
+    4, /* ltr_u */
+    4, /* lti_u */
+    4, /* ler */
+    10,        /* lei */
+    4, /* ler_u */
+    10,        /* lei_u */
+    4, /* eqr */
+    10,        /* eqi */
+    4, /* ger */
+    10,        /* gei */
+    4, /* ger_u */
+    10,        /* gei_u */
+    4, /* gtr */
+    10,        /* gti */
+    4, /* gtr_u */
+    10,        /* gti_u */
+    6, /* ner */
+    12,        /* nei */
+    2, /* movr */
+    6, /* movi */
+    6, /* movnr */
+    6, /* movzr */
+    24,        /* casr */
+    30,        /* casi */
+    2, /* extr_c */
+    4, /* exti_c */
+    2, /* extr_uc */
+    4, /* exti_uc */
+    2, /* extr_s */
+    4, /* exti_s */
+    2, /* extr_us */
+    4, /* exti_us */
+    4, /* extr_i */
+    0, /* exti_i */
+    8, /* extr_ui */
+    0, /* exti_ui */
+    4, /* bswapr_us */
+    4, /* bswapi_us */
+    6, /* bswapr_ui */
+    8, /* bswapi_ui */
+    0, /* bswapr_ul */
+    0, /* bswapi_ul */
+    4, /* htonr_us */
+    4, /* htoni_us */
+    6, /* htonr_ui */
+    8, /* htoni_ui */
+    0, /* htonr_ul */
+    0, /* htoni_ul */
+    2, /* ldr_c */
+    12,        /* ldi_c */
+    4, /* ldr_uc */
+    12,        /* ldi_uc */
+    2, /* ldr_s */
+    12,        /* ldi_s */
+    4, /* ldr_us */
+    12,        /* ldi_us */
+    2, /* ldr_i */
+    12,        /* ldi_i */
+    2, /* ldr_ui */
+    12,        /* ldi_ui */
+    0, /* ldr_l */
+    0, /* ldi_l */
+    4, /* ldxr_c */
+    16,        /* ldxi_c */
+    6, /* ldxr_uc */
+    16,        /* ldxi_uc */
+    4, /* ldxr_s */
+    16,        /* ldxi_s */
+    6, /* ldxr_us */
+    16,        /* ldxi_us */
+    4, /* ldxr_i */
+    16,        /* ldxi_i */
+    4, /* ldxr_ui */
+    16,        /* ldxi_ui */
+    0, /* ldxr_l */
+    0, /* ldxi_l */
+    2, /* str_c */
+    12,        /* sti_c */
+    2, /* str_s */
+    12,        /* sti_s */
+    2, /* str_i */
+    12,        /* sti_i */
+    0, /* str_l */
+    0, /* sti_l */
+    4, /* stxr_c */
+    16,        /* stxi_c */
+    4, /* stxr_s */
+    16,        /* stxi_s */
+    4, /* stxr_i */
+    16,        /* stxi_i */
+    0, /* stxr_l */
+    0, /* stxi_l */
+    6, /* bltr */
+    8, /* blti */
+    6, /* bltr_u */
+    8, /* blti_u */
+    6, /* bler */
+    8, /* blei */
+    6, /* bler_u */
+    8, /* blei_u */
+    6, /* beqr */
+    28,        /* beqi */
+    6, /* bger */
+    8, /* bgei */
+    6, /* bger_u */
+    8, /* bgei_u */
+    6, /* bgtr */
+    8, /* bgti */
+    6, /* bgtr_u */
+    8, /* bgti_u */
+    6, /* bner */
+    20,        /* bnei */
+    6, /* bmsr */
+    12,        /* bmsi */
+    6, /* bmcr */
+    12,        /* bmci */
+    8, /* boaddr */
+    36,        /* boaddi */
+    8, /* boaddr_u */
+    20,        /* boaddi_u */
+    8, /* bxaddr */
+    36,        /* bxaddi */
+    8, /* bxaddr_u */
+    20,        /* bxaddi_u */
+    10,        /* bosubr */
+    36,        /* bosubi */
+    8, /* bosubr_u */
+    20,        /* bosubi_u */
+    10,        /* bxsubr */
+    36,        /* bxsubi */
+    8, /* bxsubr_u */
+    20,        /* bxsubi_u */
+    4, /* jmpr */
+    10,        /* jmpi */
+    4, /* callr */
+    10,        /* calli */
+    0, /* prepare */
+    0, /* pushargr_c */
+    0, /* pushargi_c */
+    0, /* pushargr_uc */
+    0, /* pushargi_uc */
+    0, /* pushargr_s */
+    0, /* pushargi_s */
+    0, /* pushargr_us */
+    0, /* pushargi_us */
+    0, /* pushargr_i */
+    0, /* pushargi_i */
+    0, /* pushargr_ui */
+    0, /* pushargi_ui */
+    0, /* pushargr_l */
+    0, /* pushargi_l */
+    0, /* finishr */
+    0, /* finishi */
+    0, /* ret */
+    0, /* retr_c */
+    0, /* reti_c */
+    0, /* retr_uc */
+    0, /* reti_uc */
+    0, /* retr_s */
+    0, /* reti_s */
+    0, /* retr_us */
+    0, /* reti_us */
+    0, /* retr_i */
+    0, /* reti_i */
+    0, /* retr_ui */
+    0, /* reti_ui */
+    0, /* retr_l */
+    0, /* reti_l */
+    0, /* retval_c */
+    0, /* retval_uc */
+    0, /* retval_s */
+    0, /* retval_us */
+    0, /* retval_i */
+    0, /* retval_ui */
+    0, /* retval_l */
+    22,        /* epilog */
+    0, /* arg_f */
+    0, /* getarg_f */
+    0, /* putargr_f */
+    0, /* putargi_f */
+    4, /* addr_f */
+    12,        /* addi_f */
+    4, /* subr_f */
+    12,        /* subi_f */
+    12,        /* rsbi_f */
+    4, /* mulr_f */
+    12,        /* muli_f */
+    4, /* divr_f */
+    12,        /* divi_f */
+    4, /* negr_f */
+    0, /* negi_f */
+    4, /* absr_f */
+    0, /* absi_f */
+    4, /* sqrtr_f */
+    0, /* sqrti_f */
+    4, /* ltr_f */
+    12,        /* lti_f */
+    4, /* ler_f */
+    12,        /* lei_f */
+    4, /* eqr_f */
+    12,        /* eqi_f */
+    4, /* ger_f */
+    12,        /* gei_f */
+    4, /* gtr_f */
+    12,        /* gti_f */
+    8, /* ner_f */
+    16,        /* nei_f */
+    28,        /* unltr_f */
+    36,        /* unlti_f */
+    28,        /* unler_f */
+    36,        /* unlei_f */
+    28,        /* uneqr_f */
+    36,        /* uneqi_f */
+    28,        /* unger_f */
+    36,        /* ungei_f */
+    28,        /* ungtr_f */
+    36,        /* ungti_f */
+    40,        /* ltgtr_f */
+    48,        /* ltgti_f */
+    28,        /* ordr_f */
+    36,        /* ordi_f */
+    20,        /* unordr_f */
+    28,        /* unordi_f */
+    4, /* truncr_f_i */
+    4, /* truncr_f_l */
+    4, /* extr_f */
+    4, /* extr_d_f */
+    4, /* movr_f */
+    8, /* movi_f */
+    4, /* ldr_f */
+    12,        /* ldi_f */
+    8, /* ldxr_f */
+    16,        /* ldxi_f */
+    4, /* str_f */
+    12,        /* sti_f */
+    8, /* stxr_f */
+    16,        /* stxi_f */
+    8, /* bltr_f */
+    16,        /* blti_f */
+    8, /* bler_f */
+    16,        /* blei_f */
+    8, /* beqr_f */
+    16,        /* beqi_f */
+    8, /* bger_f */
+    16,        /* bgei_f */
+    8, /* bgtr_f */
+    16,        /* bgti_f */
+    8, /* bner_f */
+    16,        /* bnei_f */
+    32,        /* bunltr_f */
+    40,        /* bunlti_f */
+    32,        /* bunler_f */
+    40,        /* bunlei_f */
+    32,        /* buneqr_f */
+    40,        /* buneqi_f */
+    32,        /* bunger_f */
+    40,        /* bungei_f */
+    32,        /* bungtr_f */
+    40,        /* bungti_f */
+    44,        /* bltgtr_f */
+    52,        /* bltgti_f */
+    32,        /* bordr_f */
+    40,        /* bordi_f */
+    24,        /* bunordr_f */
+    32,        /* bunordi_f */
+    0, /* pushargr_f */
+    0, /* pushargi_f */
+    0, /* retr_f */
+    0, /* reti_f */
+    0, /* retval_f */
+    0, /* arg_d */
+    0, /* getarg_d */
+    0, /* putargr_d */
+    0, /* putargi_d */
+    4, /* addr_d */
+    24,        /* addi_d */
+    4, /* subr_d */
+    24,        /* subi_d */
+    24,        /* rsbi_d */
+    4, /* mulr_d */
+    24,        /* muli_d */
+    4, /* divr_d */
+    24,        /* divi_d */
+    4, /* negr_d */
+    0, /* negi_d */
+    4, /* absr_d */
+    0, /* absi_d */
+    4, /* sqrtr_d */
+    0, /* sqrti_d */
+    4, /* ltr_d */
+    24,        /* lti_d */
+    4, /* ler_d */
+    24,        /* lei_d */
+    4, /* eqr_d */
+    24,        /* eqi_d */
+    4, /* ger_d */
+    24,        /* gei_d */
+    4, /* gtr_d */
+    24,        /* gti_d */
+    8, /* ner_d */
+    28,        /* nei_d */
+    28,        /* unltr_d */
+    48,        /* unlti_d */
+    28,        /* unler_d */
+    48,        /* unlei_d */
+    28,        /* uneqr_d */
+    48,        /* uneqi_d */
+    28,        /* unger_d */
+    48,        /* ungei_d */
+    28,        /* ungtr_d */
+    48,        /* ungti_d */
+    40,        /* ltgtr_d */
+    60,        /* ltgti_d */
+    28,        /* ordr_d */
+    48,        /* ordi_d */
+    20,        /* unordr_d */
+    40,        /* unordi_d */
+    4, /* truncr_d_i */
+    4, /* truncr_d_l */
+    4, /* extr_d */
+    4, /* extr_f_d */
+    4, /* movr_d */
+    20,        /* movi_d */
+    4, /* ldr_d */
+    12,        /* ldi_d */
+    8, /* ldxr_d */
+    16,        /* ldxi_d */
+    4, /* str_d */
+    12,        /* sti_d */
+    8, /* stxr_d */
+    16,        /* stxi_d */
+    8, /* bltr_d */
+    28,        /* blti_d */
+    8, /* bler_d */
+    28,        /* blei_d */
+    8, /* beqr_d */
+    28,        /* beqi_d */
+    8, /* bger_d */
+    28,        /* bgei_d */
+    8, /* bgtr_d */
+    28,        /* bgti_d */
+    8, /* bner_d */
+    28,        /* bnei_d */
+    32,        /* bunltr_d */
+    52,        /* bunlti_d */
+    32,        /* bunler_d */
+    52,        /* bunlei_d */
+    32,        /* buneqr_d */
+    52,        /* buneqi_d */
+    32,        /* bunger_d */
+    52,        /* bungei_d */
+    32,        /* bungtr_d */
+    52,        /* bungti_d */
+    44,        /* bltgtr_d */
+    64,        /* bltgti_d */
+    32,        /* bordr_d */
+    52,        /* bordi_d */
+    24,        /* bunordr_d */
+    44,        /* bunordi_d */
+    0, /* pushargr_d */
+    0, /* pushargi_d */
+    0, /* retr_d */
+    0, /* reti_d */
+    0, /* retval_d */
+    4, /* movr_w_f */
+    8, /* movi_w_f */
+    0, /* movr_ww_d */
+    16,        /* movi_ww_d */
+    4, /* movr_w_d */
+    0, /* movi_w_d */
+    0, /* movr_f_w */
+    4, /* movi_f_w */
+    0, /* movr_d_ww */
+    0, /* movi_d_ww */
+    4, /* movr_d_w */
+    16,        /* movi_d_w */
+    10,        /* clor */
+    6, /* cloi */
+    12,        /* clzr */
+    6, /* clzi */
+    10,        /* ctor */
+    6, /* ctoi */
+    12,        /* ctzr */
+    6, /* ctzi */
+    14,        /* rbitr */
+    6, /* rbiti */
+    14,        /* popcntr */
+    6, /* popcnti */
+    14,        /* lrotr */
+    14,        /* lroti */
+    14,        /* rrotr */
+    14,        /* rroti */
+    8, /* extr */
+    6, /* exti */
+    4, /* extr_u */
+    6, /* exti_u */
+    4, /* depr */
+    10,        /* depi */
+    18,        /* qlshr */
+    8, /* qlshi */
+    18,        /* qlshr_u */
+    8, /* qlshi_u */
+    18,        /* qrshr */
+    8, /* qrshi */
+    18,        /* qrshr_u */
+    8, /* qrshi_u */
+    16,        /* unldr */
+    20,        /* unldi */
+    16,        /* unldr_u */
+    20,        /* unldi_u */
+    44,        /* unstr */
+    28,        /* unsti */
+    32,        /* unldr_x */
+    40,        /* unldi_x */
+    28,        /* unstr_x */
+    40,        /* unsti_x */
+    4, /* fmar_f */
+    0, /* fmai_f */
+    4, /* fmsr_f */
+    0, /* fmsi_f */
+    4, /* fmar_d */
+    0, /* fmai_d */
+    4, /* fmsr_d */
+    0, /* fmsi_d */
+    4, /* fnmar_f */
+    0, /* fnmai_f */
+    4, /* fnmsr_f */
+    0, /* fnmsi_f */
+    4, /* fnmar_d */
+    0, /* fnmai_d */
+    4, /* fnmsr_d */
+    0, /* fnmsi_d */
+    8, /* hmulr */
+    16,        /* hmuli */
+    8, /* hmulr_u */
+    16,        /* hmuli_u */
+    8, /* ldxbr_c */
+    18,        /* ldxbi_c */
+    12, /* ldxar_c */
+    18,        /* ldxai_c */
+    4, /* ldxbr_uc */
+    18,        /* ldxbi_uc */
+    8, /* ldxar_uc */
+    18,        /* ldxai_uc */
+    4, /* ldxbr_s */
+    18,        /* ldxbi_s */
+    8, /* ldxar_s */
+    18,        /* ldxai_s */
+    4, /* ldxbr_us */
+    18,        /* ldxbi_us */
+    8, /* ldxar_us */
+    18,        /* ldxai_us */
+    4, /* ldxbr_i */
+    18,        /* ldxbi_i */
+    8, /* ldxar_i */
+    18,        /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    4, /* ldxbr_f */
+    18,        /* ldxbi_f */
+    8, /* ldxar_f */
+    18,        /* ldxai_f */
+    4, /* ldxbr_d */
+    18, /* ldxbi_d */
+    8, /* ldxar_d */
+    18, /* ldxai_d */
+    4, /* stxbr_c */
+    18,        /* stxbi_c */
+    8, /* stxar_c */
+    18,        /* stxai_c */
+    4, /* stxbr_s */
+    18,        /* stxbi_s */
+    8, /* stxar_s */
+    18,        /* stxai_s */
+    4, /* stxbr_i */
+    18,        /* stxbi_i */
+    8, /* stxar_i */
+    18,        /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    4, /* stxbr_f */
+    18,        /* stxbi_f */
+    8, /* stxar_f */
+    18,        /* stxai_f */
+    4, /* stxbr_d */
+    18, /* stxbi_d */
+    8, /* stxar_d */
+    18, /* stxai_d */
diff --git a/deps/lightning/lib/jit_sh.c b/deps/lightning/lib/jit_sh.c
new file mode 100644 (file)
index 0000000..9806f14
--- /dev/null
@@ -0,0 +1,2215 @@
+/*
+ * Copyright (C) 2022  Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ *     Paul Cercueil
+ */
+
+#  define NUM_WORD_ARGS                        4
+#  define NUM_FLOAT_ARGS               8
+#  define STACK_SLOT                   4
+#  define STACK_SHIFT                  2
+
+#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < NUM_WORD_ARGS)
+#define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < NUM_FLOAT_ARGS)
+
+#define fpr_args_inverted()            (__BYTE_ORDER == __LITTLE_ENDIAN && !SH_SINGLE_ONLY)
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define C_DISP                       0
+#  define S_DISP                       0
+#  define I_DISP                       0
+#else
+#  define C_DISP                       STACK_SLOT - sizeof(jit_int8_t)
+#  define S_DISP                       STACK_SLOT - sizeof(jit_int16_t)
+#  define I_DISP                       STACK_SLOT - sizeof(jit_int32_t)
+#endif
+
+#define jit_make_arg(node,code)                _jit_make_arg(_jit,node,code)
+static jit_node_t *_jit_make_arg(jit_state_t*,jit_node_t*,jit_code_t);
+#define jit_make_arg_f(node)           _jit_make_arg_f(_jit,node)
+static jit_node_t *_jit_make_arg_f(jit_state_t*,jit_node_t*);
+#define jit_make_arg_d(node)           _jit_make_arg_d(_jit,node)
+static jit_node_t *_jit_make_arg_d(jit_state_t*,jit_node_t*);
+#define load_const(uniq,r0,i0)         _load_const(_jit,uniq,r0,i0)
+static void _load_const(jit_state_t*,jit_bool_t,jit_int32_t,jit_word_t);
+#define load_const_f(uniq,r0,i0)       _load_const_f(_jit,uniq,r0,i0)
+static void _load_const_f(jit_state_t*,jit_bool_t,jit_int32_t,jit_float32_t);
+#define flush_consts(force)            _flush_consts(_jit,force)
+static void _flush_consts(jit_state_t*,jit_bool_t);
+#define invalidate_consts()            _invalidate_consts(_jit)
+static void _invalidate_consts(jit_state_t*);
+#define patch(instr, node)             _patch(_jit, instr, node)
+static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
+
+#define PROTO 1
+#  include "jit_rewind.c"
+#  include "jit_sh-cpu.c"
+#  include "jit_sh-fpu.c"
+#  include "jit_fallback.c"
+#undef PROTO
+
+jit_register_t _rvs[] = {
+    { 0x0,                             "r0" },
+    { rc(gpr) | 0x1,                   "r1" },
+    { rc(gpr) | 0x2,                   "r2" },
+    { rc(gpr) | 0x3,                   "r3" },
+    { rc(arg) | rc(gpr) | 0x4,         "r4" },
+    { rc(arg) | rc(gpr) | 0x5,         "r5" },
+    { rc(arg) | rc(gpr) | 0x6,         "r6" },
+    { rc(arg) | rc(gpr) | 0x7,         "r7" },
+    { rc(sav) | rc(gpr) | 0x8,         "r8" },
+    { rc(sav) | rc(gpr) | 0x9,         "r9" },
+    { rc(sav) | rc(gpr) | 0xa,         "r10" },
+    { rc(sav) | rc(gpr) | 0xb,         "r11" },
+    { rc(sav) | rc(gpr) | 0xc,         "r12" },
+    { rc(sav) | rc(gpr) | 0xd,         "r13" },
+    { rc(sav) | 0xe,                   "r14" },
+    { rc(sav) | 0xf,                   "r15" },
+    { 0x10,                            "gbr" },
+
+    /* Only use half of the floating-point registers.
+     * This makes it much easier to switch between
+     * float and double processing. */
+    { rc(fpr) | 0x0,                   "$f0" },
+    { 0x1,                             "$f1" },
+    { rc(fpr) | 0x2,                   "$f2" },
+    { 0x3,                             "$f3" },
+    { rc(fpr) | 0x4,                   "$f4" },
+    { 0x5,                             "$f5" },
+    { rc(fpr) | 0x6,                   "$f6" },
+    { 0x7,                             "$f7" },
+    { rc(fpr) | 0x8,                   "$f8" },
+    { 0x9,                             "$f9" },
+    { rc(fpr) | 0xa,                   "$f10" },
+    { 0xb,                             "$f11" },
+    { rc(fpr) | 0xc,                   "$f12" },
+    { 0xd,                             "$f13" },
+    { rc(fpr) | 0xe,                   "$f14" },
+    { 0xf,                             "$f15" },
+
+    { _XF0,                            "$xf0" },
+    { _XF1,                            "$xf1" },
+    { _XF2,                            "$xf2" },
+    { _XF3,                            "$xf3" },
+    { _XF4,                            "$xf4" },
+    { _XF5,                            "$xf5" },
+    { _XF6,                            "$xf6" },
+    { _XF7,                            "$xf7" },
+    { _XF8,                            "$xf8" },
+    { _XF9,                            "$xf9" },
+    { _XF10,                           "$xf10" },
+    { _XF11,                           "$xf11" },
+    { rc(sav) | _XF12,                 "$xf12" },
+    { rc(sav) | _XF13,                 "$xf13" },
+    { rc(sav) | _XF14,                 "$xf14" },
+    { rc(sav) | _XF15,                 "$xf15" },
+};
+
+typedef struct jit_va_list {
+    jit_pointer_t      bgpr;
+    jit_pointer_t      egpr;
+    jit_pointer_t      bfpr;
+    jit_pointer_t      efpr;
+    jit_pointer_t      over;
+} jit_va_list_t;
+
+static jit_bool_t jit_uses_fpu(jit_code_t code)
+{
+       switch (code) {
+       case jit_code_retr_f:
+       case jit_code_retr_d:
+       case jit_code_pushargr_f:
+       case jit_code_pushargr_d:
+       case jit_code_reti_f:
+       case jit_code_pushargi_f:
+       case jit_code_reti_d:
+       case jit_code_pushargi_d:
+       case jit_code_arg_f:
+       case jit_code_arg_d:
+       case jit_code_retval_f:
+       case jit_code_retval_d:
+       case jit_code_getarg_f:
+       case jit_code_getarg_d:
+       case jit_code_putargr_f:
+       case jit_code_putargr_d:
+       case jit_code_putargi_f:
+       case jit_code_putargi_d:
+       case jit_code_ldi_f:
+       case jit_code_ldi_d:
+       case jit_code_movi_w_f:
+       case jit_code_movi_w_d:
+       case jit_code_movi_ww_d:
+       case jit_code_movi_f:
+       case jit_code_movi_f_w:
+       case jit_code_negi_f:
+       case jit_code_absi_f:
+       case jit_code_sqrti_f:
+       case jit_code_movi_d:
+       case jit_code_movi_d_w:
+       case jit_code_negi_d:
+       case jit_code_absi_d:
+       case jit_code_sqrti_d:
+       case jit_code_truncr_f_i:
+       case jit_code_truncr_f_l:
+       case jit_code_truncr_d_i:
+       case jit_code_truncr_d_l:
+       case jit_code_negr_f:
+       case jit_code_absr_f:
+       case jit_code_sqrtr_f:
+       case jit_code_movr_f:
+       case jit_code_extr_f:
+       case jit_code_extr_d_f:
+       case jit_code_ldr_f:
+       case jit_code_negr_d:
+       case jit_code_absr_d:
+       case jit_code_sqrtr_d:
+       case jit_code_movr_d:
+       case jit_code_extr_d:
+       case jit_code_extr_f_d:
+       case jit_code_ldr_d:
+       case jit_code_movr_w_f:
+       case jit_code_movr_f_w:
+       case jit_code_movr_w_d:
+       case jit_code_movr_d_w:
+       case jit_code_va_arg_d:
+       case jit_code_ldxi_f:
+       case jit_code_ldxi_d:
+       case jit_code_addi_f:
+       case jit_code_subi_f:
+       case jit_code_rsbi_f:
+       case jit_code_muli_f:
+       case jit_code_divi_f:
+       case jit_code_lti_f:
+       case jit_code_lei_f:
+       case jit_code_eqi_f:
+       case jit_code_gei_f:
+       case jit_code_gti_f:
+       case jit_code_nei_f:
+       case jit_code_unlti_f:
+       case jit_code_unlei_f:
+       case jit_code_uneqi_f:
+       case jit_code_ungei_f:
+       case jit_code_ungti_f:
+       case jit_code_ltgti_f:
+       case jit_code_ordi_f:
+       case jit_code_unordi_f:
+       case jit_code_addi_d:
+       case jit_code_subi_d:
+       case jit_code_rsbi_d:
+       case jit_code_muli_d:
+       case jit_code_divi_d:
+       case jit_code_lti_d:
+       case jit_code_lei_d:
+       case jit_code_eqi_d:
+       case jit_code_gei_d:
+       case jit_code_gti_d:
+       case jit_code_nei_d:
+       case jit_code_unlti_d:
+       case jit_code_unlei_d:
+       case jit_code_uneqi_d:
+       case jit_code_ungei_d:
+       case jit_code_ungti_d:
+       case jit_code_ltgti_d:
+       case jit_code_ordi_d:
+       case jit_code_unordi_d:
+       case jit_code_addr_f:
+       case jit_code_subr_f:
+       case jit_code_mulr_f:
+       case jit_code_divr_f:
+       case jit_code_ltr_f:
+       case jit_code_ler_f:
+       case jit_code_eqr_f:
+       case jit_code_ger_f:
+       case jit_code_gtr_f:
+       case jit_code_ner_f:
+       case jit_code_unltr_f:
+       case jit_code_unler_f:
+       case jit_code_uneqr_f:
+       case jit_code_unger_f:
+       case jit_code_ungtr_f:
+       case jit_code_ltgtr_f:
+       case jit_code_ordr_f:
+       case jit_code_unordr_f:
+       case jit_code_ldxr_f:
+       case jit_code_addr_d:
+       case jit_code_subr_d:
+       case jit_code_mulr_d:
+       case jit_code_divr_d:
+       case jit_code_ltr_d:
+       case jit_code_ler_d:
+       case jit_code_eqr_d:
+       case jit_code_ger_d:
+       case jit_code_gtr_d:
+       case jit_code_ner_d:
+       case jit_code_unltr_d:
+       case jit_code_unler_d:
+       case jit_code_uneqr_d:
+       case jit_code_unger_d:
+       case jit_code_ungtr_d:
+       case jit_code_ltgtr_d:
+       case jit_code_ordr_d:
+       case jit_code_unordr_d:
+       case jit_code_ldxr_d:
+       case jit_code_movr_ww_d:
+       case jit_code_sti_f:
+       case jit_code_sti_d:
+       case jit_code_blti_f:
+       case jit_code_blei_f:
+       case jit_code_beqi_f:
+       case jit_code_bgei_f:
+       case jit_code_bgti_f:
+       case jit_code_bnei_f:
+       case jit_code_bunlti_f:
+       case jit_code_bunlei_f:
+       case jit_code_buneqi_f:
+       case jit_code_bungei_f:
+       case jit_code_bungti_f:
+       case jit_code_bltgti_f:
+       case jit_code_bordi_f:
+       case jit_code_bunordi_f:
+       case jit_code_blti_d:
+       case jit_code_blei_d:
+       case jit_code_beqi_d:
+       case jit_code_bgei_d:
+       case jit_code_bgti_d:
+       case jit_code_bnei_d:
+       case jit_code_bunlti_d:
+       case jit_code_bunlei_d:
+       case jit_code_buneqi_d:
+       case jit_code_bungei_d:
+       case jit_code_bungti_d:
+       case jit_code_bltgti_d:
+       case jit_code_bordi_d:
+       case jit_code_bunordi_d:
+       case jit_code_str_f:
+       case jit_code_str_d:
+       case jit_code_stxi_f:
+       case jit_code_stxi_d:
+       case jit_code_bltr_f:
+       case jit_code_bler_f:
+       case jit_code_beqr_f:
+       case jit_code_bger_f:
+       case jit_code_bgtr_f:
+       case jit_code_bner_f:
+       case jit_code_bunltr_f:
+       case jit_code_bunler_f:
+       case jit_code_buneqr_f:
+       case jit_code_bunger_f:
+       case jit_code_bungtr_f:
+       case jit_code_bltgtr_f:
+       case jit_code_bordr_f:
+       case jit_code_bunordr_f:
+       case jit_code_bltr_d:
+       case jit_code_bler_d:
+       case jit_code_beqr_d:
+       case jit_code_bger_d:
+       case jit_code_bgtr_d:
+       case jit_code_bner_d:
+       case jit_code_bunltr_d:
+       case jit_code_bunler_d:
+       case jit_code_buneqr_d:
+       case jit_code_bunger_d:
+       case jit_code_bungtr_d:
+       case jit_code_bltgtr_d:
+       case jit_code_bordr_d:
+       case jit_code_bunordr_d:
+       case jit_code_stxr_f:
+       case jit_code_stxr_d:
+       case jit_code_fmar_f:
+       case jit_code_fmar_d:
+       case jit_code_fmsr_f:
+       case jit_code_fmsr_d:
+       case jit_code_fnmar_f:
+       case jit_code_fnmar_d:
+       case jit_code_fnmsr_f:
+       case jit_code_fnmsr_d:
+       case jit_code_fmai_f:
+       case jit_code_fmsi_f:
+       case jit_code_fnmai_f:
+       case jit_code_fnmsi_f:
+       case jit_code_fmai_d:
+       case jit_code_fmsi_d:
+       case jit_code_fnmai_d:
+       case jit_code_fnmsi_d:
+       case jit_code_ldxbi_f:
+       case jit_code_ldxai_f:
+       case jit_code_ldxbi_d:
+       case jit_code_ldxai_d:
+       case jit_code_ldxbr_f:
+       case jit_code_ldxar_f:
+       case jit_code_ldxbr_d:
+       case jit_code_ldxar_d:
+       case jit_code_stxbi_f:
+       case jit_code_stxai_f:
+       case jit_code_stxbi_d:
+       case jit_code_stxai_d:
+       case jit_code_stxbr_f:
+       case jit_code_stxar_f:
+       case jit_code_stxbr_d:
+       case jit_code_stxar_d:
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+void
+jit_get_cpu(void)
+{
+}
+
+void
+_jit_init(jit_state_t *_jit)
+{
+       _jitc->reglen = jit_size(_rvs) - 1;
+}
+
+void
+_jit_prolog(jit_state_t *_jit)
+{
+    jit_int32_t                 offset;
+
+    if (_jitc->function)
+       jit_epilog();
+
+    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
+
+    jit_regset_set_ui(&_jitc->regsav, 0);
+    offset = _jitc->functions.offset;
+
+    if (offset >= _jitc->functions.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
+                   _jitc->functions.length * sizeof(jit_function_t),
+                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
+       _jitc->functions.length += 16;
+    }
+
+    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
+    _jitc->function->self.size = stack_framesize;
+    _jitc->function->self.argi = _jitc->function->self.argf =
+       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+    _jitc->function->self.call = jit_call_default;
+
+    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
+             _jitc->reglen * sizeof(jit_int32_t));
+
+    /* _no_link here does not mean the jit_link() call can be removed
+     * by rewriting as:
+     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
+     */
+    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
+    jit_link(_jitc->function->prolog);
+
+    _jitc->function->prolog->w.w = offset;
+    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
+    /* u:      label value
+     * v:      offset in blocks vector
+     * w:      offset in functions vector
+     */
+    _jitc->function->epilog->w.w = offset;
+
+    jit_regset_new(&_jitc->function->regset);
+}
+
+jit_int32_t
+_jit_allocai(jit_state_t *_jit, jit_int32_t length)
+{
+    assert(_jitc->function);
+    switch (length) {
+       case 0: case 1:                                         break;
+       case 2:         _jitc->function->self.aoff &= -2;       break;
+       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
+       default:        _jitc->function->self.aoff &= -8;       break;
+    }
+    _jitc->function->self.aoff -= length;
+    if (!_jitc->realize) {
+       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
+       jit_dec_synth();
+    }
+    return (_jitc->function->self.aoff);
+}
+
+void
+_jit_allocar(jit_state_t *_jit, jit_int32_t u, jit_int32_t v)
+{
+    jit_int32_t                 reg;
+    assert(_jitc->function);
+    jit_inc_synth_ww(allocar, u, v);
+    if (!_jitc->function->allocar) {
+       _jitc->function->aoffoff = jit_allocai(sizeof(jit_int32_t));
+       _jitc->function->allocar = 1;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    jit_negr(reg, v);
+    jit_andi(reg, reg, -8);
+    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
+    jit_addr(u, u, reg);
+    jit_addr(JIT_SP, JIT_SP, reg);
+    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
+    jit_unget_reg(reg);
+    jit_dec_synth();
+}
+
+void
+_jit_ret(jit_state_t *_jit)
+{
+    jit_node_t         *instr;
+    assert(_jitc->function);
+    jit_inc_synth(ret);
+    /* jump to epilog */
+    instr = jit_jmpi();
+    jit_patch_at(instr, _jitc->function->epilog);
+    jit_dec_synth();
+}
+
+void
+_jit_retr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
+{
+    jit_code_inc_synth_w(code, u);
+    jit_movr(JIT_RET, u);
+    jit_live(JIT_RET);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_reti(jit_state_t *_jit, jit_word_t u, jit_code_t code)
+{
+    jit_code_inc_synth_w(code, u);
+    jit_movi(JIT_RET, u);
+    jit_ret();
+    jit_dec_synth();
+}
+
+void
+_jit_epilog(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    assert(_jitc->function->epilog->next == NULL);
+    jit_link(_jitc->function->epilog);
+    _jitc->function = NULL;
+}
+
+void
+_jit_putargr(jit_state_t *_jit, jit_int32_t u, jit_node_t *v, jit_code_t code)
+{
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_movr(JIT_RA0 + v->u.w, u);
+    else
+       jit_stxi(v->u.w, JIT_FP, u);
+    jit_dec_synth();
+}
+
+void
+_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v, jit_code_t code)
+{
+    jit_int32_t                regno;
+    assert_putarg_type(code, v->code);
+    jit_code_inc_synth_wp(code, u, v);
+    if (jit_arg_reg_p(v->u.w))
+        jit_movi(JIT_RA0 + v->u.w, u);
+    else {
+        regno = jit_get_reg(jit_class_gpr);
+        jit_movi(regno, u);
+        jit_stxi(v->u.w, JIT_FP, regno);
+        jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+jit_bool_t
+_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
+{
+    jit_int32_t                spec;
+
+    spec = jit_class(_rvs[regno].spec);
+    if (spec & jit_class_arg) {
+       if (spec & jit_class_gpr) {
+           regno = JIT_RA0 + regno;
+           if (regno >= 0 && regno < node->v.w)
+               return (1);
+       }
+       else if (spec & jit_class_fpr) {
+           regno = JIT_FA0 + regno;
+           if (regno >= 0 && regno < node->w.w)
+               return (1);
+       }
+    }
+
+    return (0);
+}
+
+jit_pointer_t
+_emit_code(jit_state_t *_jit)
+{
+    jit_node_t         *node;
+    jit_node_t         *temp;
+    jit_word_t          word;
+    jit_word_t          value;
+    jit_int32_t                 offset;
+    struct {
+       jit_node_t      *node;
+       jit_uint8_t     *data;
+       jit_word_t       word;
+#if DEVEL_DISASSEMBLER
+       jit_word_t       prevw;
+#endif
+#if DISASSEMBLER
+       jit_int32_t      info_offset;
+#endif
+       jit_int32_t      const_offset;
+       jit_int32_t      patch_offset;
+    } undo;
+#if DEVEL_DISASSEMBLER
+    jit_word_t          prevw;
+#endif
+
+    _jitc->function = NULL;
+    _jitc->no_flag = 0;
+    _jitc->mode_d = SH_DEFAULT_FPU_MODE;
+    _jitc->uses_fpu = 0;
+
+    jit_reglive_setup();
+
+    _jitc->consts.data = NULL;
+    _jitc->consts.offset = _jitc->consts.length = 0;
+
+    undo.word = 0;
+    undo.node = NULL;
+    undo.data = NULL;
+#if DISASSEMBLER
+    undo.info_offset =
+#endif
+    undo.const_offset = undo.patch_offset = 0;
+#define case_rr(name, type)                                            \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w));            \
+               break
+#define case_rw(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), node->v.w);                \
+               break
+#define case_wr(name, type)                                            \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w));                \
+               break
+#define case_rrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrrr(name, type)                                          \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w),                            \
+                             rn(node->v.w), node->w.w);                \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_xrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w),                 \
+                             rn(node->w.w));                           \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break
+#define case_rrrw(name, type)                                          \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
+                             rn(node->v.w), node->w.w);                \
+               break
+#define case_rqr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.q.l),           \
+                             rn(node->v.q.h), rn(node->w.w));          \
+           case jit_code_##name##i##type:                              \
+               break;
+#define case_rrf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               name##i_f(rn(node->u.w), rn(node->v.w), node->w.f);     \
+               break
+#define case_rrd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               name##i_d(rn(node->u.w), rn(node->v.w), node->w.d);     \
+               break
+#define case_wrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_brr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##r##type(temp->u.w, rn(node->v.w),             \
+                                 rn(node->w.w));                       \
+               else {                                                  \
+                   word = _jit->code.length                            \
+                       - (_jit->pc.uc - _jit->code.ptr);               \
+                   if (word < 4094) {                                  \
+                       word = name##r##type(0, rn(node->v.w),          \
+                                            rn(node->w.w));            \
+                   } else {                                            \
+                       word = name##r##type##_p(_jit->pc.w,            \
+                                                rn(node->v.w),         \
+                                                rn(node->w.w), 1);     \
+                   }                                                   \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i##type(temp->u.w,                            \
+                                 rn(node->v.w), node->w.w);            \
+               else {                                                  \
+                   word = _jit->code.length                            \
+                       - (_jit->pc.uc - _jit->code.ptr);               \
+                   if (word < 4094) {                                  \
+                       word = name##i##type(0, rn(node->v.w),          \
+                                            node->w.w);                \
+                   } else {                                            \
+                       word = name##i##type##_p(_jit->pc.w,            \
+                                                rn(node->v.w),         \
+                                                node->w.w, 1);         \
+                   }                                                   \
+                   patch(word, node);                                  \
+               }                                                       \
+               break;
+#define case_brf(name)                                                 \
+           case jit_code_##name##i_f:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_f(temp->u.w, rn(node->v.w), node->w.f);     \
+               else {                                                  \
+                   word = name##i_f_p(_jit->pc.w, rn(node->v.w),       \
+                                      node->w.f, 1);                   \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#define case_brd(name)                                                 \
+           case jit_code_##name##i_d:                                  \
+               temp = node->u.n;                                       \
+               assert(temp->code == jit_code_label ||                  \
+                      temp->code == jit_code_epilog);                  \
+               if (temp->flag & jit_flag_patch)                        \
+                   name##i_d(temp->u.w, rn(node->v.w), node->w.d);     \
+               else {                                                  \
+                   word = name##i_d_p(_jit->pc.w, rn(node->v.w),       \
+                                      node->w.d, 1);                   \
+                   patch(word, node);                                  \
+               }                                                       \
+               break
+#if DEVEL_DISASSEMBLER
+    prevw = _jit->pc.w;
+#endif
+    if (SH_HAS_FPU) {
+           for (node = _jitc->head; node && !_jitc->uses_fpu; node = node->next)
+                   _jitc->uses_fpu = jit_uses_fpu(node->code);
+    }
+
+    for (node = _jitc->head; node; node = node->next) {
+       if (_jit->pc.uc >= _jitc->code.end)
+           return (NULL);
+
+#if DEVEL_DISASSEMBLER
+       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
+       prevw = _jit->pc.w;
+#endif
+       value = jit_classify(node->code);
+       jit_regarg_set(node, value);
+       switch (node->code) {
+           case jit_code_align:
+               /* Must align to a power of two */
+               assert(!(node->u.w & (node->u.w - 1)));
+               if ((word = _jit->pc.w & (node->u.w - 1)))
+                   nop(node->u.w - word);
+               break;
+           case jit_code_skip:
+               nop((node->u.w + 3) & ~3);
+               break;
+           case jit_code_note:         case jit_code_name:
+               node->u.w = _jit->pc.w;
+               break;
+           case jit_code_label:
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               /* Reset FPU mode */
+               set_fmode_no_r0(_jit, SH_DEFAULT_FPU_MODE);
+               node->u.w = _jit->pc.w;
+               break;
+               case_rrr(add,);
+               case_rrw(add,);
+               case_rrr(addc,);
+               case_rrw(addc,);
+               case_rrr(addx,);
+               case_rrw(addx,);
+               case_rrr(sub,);
+               case_rrw(sub,);
+               case_rrr(subc,);
+               case_rrw(subc,);
+               case_rrr(subx,);
+               case_rrw(subx,);
+               case_rrw(rsb,);
+               case_rrr(mul,);
+               case_rrw(mul,);
+               case_rrr(hmul,);
+               case_rrw(hmul,);
+               case_rrr(hmul, _u);
+               case_rrw(hmul, _u);
+               case_rrrr(qmul,);
+               case_rrrw(qmul,);
+               case_rrrr(qmul, _u);
+               case_rrrw(qmul, _u);
+               case_rrr(div,);
+               case_rrw(div,);
+               case_rrr(div, _u);
+               case_rrw(div, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
+               case_rrr(rem,);
+               case_rrw(rem,);
+               case_rrr(rem, _u);
+               case_rrw(rem, _u);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrrr(qlsh,);
+               case_rrrw(qlsh,);
+               case_rrrr(qlsh, _u);
+               case_rrrw(qlsh, _u);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rrrr(qrsh,);
+               case_rrrw(qrsh,);
+               case_rrrr(qrsh, _u);
+               case_rrrw(qrsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rr(clo,);
+               case_rr(clz,);
+               case_rr(cto,);
+               case_rr(ctz,);
+               case_rr(rbit,);
+               case_rr(popcnt,);
+               case_rrr(lrot,);
+               case_rrw(lrot,);
+               case_rrr(rrot,);
+               case_rrw(rrot,);
+               case_rrr(and,);
+               case_rrw(and,);
+               case_rrr(or,);
+               case_rrw(or,);
+               case_rrr(xor,);
+               case_rrw(xor,);
+               case_rr(trunc, _f_i);
+               case_rr(trunc, _d_i);
+               case_rr(ld, _c);
+               case_rw(ld, _c);
+               case_rr(ld, _uc);
+               case_rw(ld, _uc);
+               case_rr(ld, _s);
+               case_rw(ld, _s);
+               case_rr(ld, _us);
+               case_rw(ld, _us);
+               case_rr(ld, _i);
+               case_rw(ld, _i);
+               case_rrr(ldx, _c);
+               case_rrw(ldx, _c);
+               case_rrr(ldx, _uc);
+               case_rrw(ldx, _uc);
+               case_rrr(ldx, _s);
+               case_rrw(ldx, _s);
+               case_rrr(ldx, _us);
+               case_rrw(ldx, _us);
+               case_rrr(ldx, _i);
+               case_rrw(ldx, _i);
+           case jit_code_unldr:
+               unldr(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unldi:
+               unldi(rn(node->u.w), node->v.w, node->w.w);
+               break;
+           case jit_code_unldr_u:
+               unldr_u(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unldi_u:
+               unldi_u(rn(node->u.w), node->v.w, node->w.w);
+               break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
+               case_rr(st, _c);
+               case_wr(st, _c);
+               case_rr(st, _s);
+               case_wr(st, _s);
+               case_rr(st, _i);
+               case_wr(st, _i);
+               case_rrr(stx, _c);
+               case_wrr(stx, _c);
+               case_rrr(stx, _s);
+               case_wrr(stx, _s);
+               case_rrr(stx, _i);
+               case_wrr(stx, _i);
+           case jit_code_unstr:
+               unstr(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unsti:
+               unsti(node->u.w, rn(node->v.w), node->w.w);
+               break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
+               case_rr(hton, _us);
+               case_rr(hton, _ui);
+               case_rr(bswap, _us);
+               case_rr(bswap, _ui);
+           case jit_code_extr:
+               extr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
+               break;
+           case jit_code_extr_u:
+               extr_u(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
+               break;
+           case jit_code_depr:
+               depr(rn(node->u.w), rn(node->v.w), node->w.q.l, node->w.q.h);
+               break;
+           case jit_code_depi:
+               depi(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
+               break;
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rrr(movn,);
+               case_rrr(movz,);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), temp->u.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
+               case_brr(blt,);
+               case_brw(blt,);
+               case_brr(blt, _u);
+               case_brw(blt, _u);
+               case_brr(ble,);
+               case_brw(ble,);
+               case_brr(ble, _u);
+               case_brw(ble, _u);
+               case_brr(beq,);
+               case_brw(beq,);
+               case_brr(bge,);
+               case_brw(bge,);
+               case_brr(bge, _u);
+               case_brw(bge, _u);
+               case_brr(bgt,);
+               case_brw(bgt,);
+               case_brr(bgt, _u);
+               case_brw(bgt, _u);
+               case_brr(bne,);
+               case_brw(bne,);
+               case_brr(boadd,);
+               case_brw(boadd,);
+               case_brr(boadd, _u);
+               case_brw(boadd, _u);
+               case_brr(bxadd,);
+               case_brw(bxadd,);
+               case_brr(bxadd, _u);
+               case_brw(bxadd, _u);
+               case_brr(bosub,);
+               case_brw(bosub,);
+               case_brr(bosub, _u);
+               case_brw(bosub, _u);
+               case_brr(bxsub,);
+               case_brw(bxsub,);
+               case_brr(bxsub, _u);
+               case_brw(bxsub, _u);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
+               case_rrr(add, _f);
+               case_rrf(add);
+               case_rrr(sub, _f);
+               case_rrf(sub);
+               case_rrf(rsb);
+               case_rrr(mul, _f);
+               case_rrf(mul);
+               case_rrr(div, _f);
+               case_rrf(div);
+               case_rr(abs, _f);
+               case_rr(neg, _f);
+               case_rr(sqrt, _f);
+               case_rqr(fma, _f);
+               case_rqr(fms, _f);
+               case_rqr(fnma, _f);
+               case_rqr(fnms, _f);
+               case_rr(ext, _f);
+               case_rr(ld, _f);
+               case_rw(ld, _f);
+               case_rrr(ldx, _f);
+               case_rrw(ldx, _f);
+           case jit_code_unldr_x:
+               unldr_x(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unldi_x:
+               unldi_x(rn(node->u.w), node->v.w, node->w.w);
+               break;
+               case_rr(st, _f);
+               case_wr(st, _f);
+               case_rrr(stx, _f);
+               case_wrr(stx, _f);
+           case jit_code_unstr_x:
+               unstr_x(rn(node->u.w), rn(node->v.w), node->w.w);
+               break;
+           case jit_code_unsti_x:
+               unsti_x(node->u.w, rn(node->v.w), node->w.w);
+               break;
+               case_rr(mov, _f);
+           case jit_code_movi_f:
+               movi_f(rn(node->u.w), node->v.f);
+               break;
+               case_rr(ext, _d_f);
+               case_rrr(lt, _f);
+               case_rrf(lt);
+               case_rrr(le, _f);
+               case_rrf(le);
+               case_rrr(eq, _f);
+               case_rrf(eq);
+               case_rrr(ge, _f);
+               case_rrf(ge);
+               case_rrr(gt, _f);
+               case_rrf(gt);
+               case_rrr(ne, _f);
+               case_rrf(ne);
+               case_rrr(unlt, _f);
+               case_rrf(unlt);
+               case_rrr(unle, _f);
+               case_rrf(unle);
+               case_rrr(uneq, _f);
+               case_rrf(uneq);
+               case_rrr(unge, _f);
+               case_rrf(unge);
+               case_rrr(ungt, _f);
+               case_rrf(ungt);
+               case_rrr(ltgt, _f);
+               case_rrf(ltgt);
+               case_rrr(ord, _f);
+               case_rrf(ord);
+               case_rrr(unord, _f);
+               case_rrf(unord);
+               case_brr(blt, _f);
+               case_brf(blt);
+               case_brr(ble, _f);
+               case_brf(ble);
+               case_brr(beq, _f);
+               case_brf(beq);
+               case_brr(bge, _f);
+               case_brf(bge);
+               case_brr(bgt, _f);
+               case_brf(bgt);
+               case_brr(bne, _f);
+               case_brf(bne);
+               case_brr(bunlt, _f);
+               case_brf(bunlt);
+               case_brr(bunle, _f);
+               case_brf(bunle);
+               case_brr(buneq, _f);
+               case_brf(buneq);
+               case_brr(bunge, _f);
+               case_brf(bunge);
+               case_brr(bungt, _f);
+               case_brf(bungt);
+               case_brr(bltgt, _f);
+               case_brf(bltgt);
+               case_brr(bord, _f);
+               case_brf(bord);
+               case_brr(bunord, _f);
+               case_brf(bunord);
+               case_rrr(add, _d);
+               case_rrd(add);
+               case_rrr(sub, _d);
+               case_rrd(sub);
+               case_rrd(rsb);
+               case_rrr(mul, _d);
+               case_rrd(mul);
+               case_rrr(div, _d);
+               case_rrd(div);
+               case_rr(abs, _d);
+               case_rr(neg, _d);
+               case_rr(sqrt, _d);
+               case_rqr(fma, _d);
+               case_rqr(fms, _d);
+               case_rqr(fnma, _d);
+               case_rqr(fnms, _d);
+               case_rr(ext, _d);
+               case_rr(ld, _d);
+               case_rw(ld, _d);
+               case_rrr(ldx, _d);
+               case_rrw(ldx, _d);
+               case_rr(st, _d);
+               case_wr(st, _d);
+               case_rrr(stx, _d);
+               case_wrr(stx, _d);
+               case_rr(mov, _d);
+           case jit_code_movi_d:
+               movi_d(rn(node->u.w), node->v.d);
+               break;
+               case_rr(ext, _f_d);
+               case_rrr(lt, _d);
+               case_rrd(lt);
+               case_rrr(le, _d);
+               case_rrd(le);
+               case_rrr(eq, _d);
+               case_rrd(eq);
+               case_rrr(ge, _d);
+               case_rrd(ge);
+               case_rrr(gt, _d);
+               case_rrd(gt);
+               case_rrr(ne, _d);
+               case_rrd(ne);
+               case_rrr(unlt, _d);
+               case_rrd(unlt);
+               case_rrr(unle, _d);
+               case_rrd(unle);
+               case_rrr(uneq, _d);
+               case_rrd(uneq);
+               case_rrr(unge, _d);
+               case_rrd(unge);
+               case_rrr(ungt, _d);
+               case_rrd(ungt);
+               case_rrr(ltgt, _d);
+               case_rrd(ltgt);
+               case_rrr(ord, _d);
+               case_rrd(ord);
+               case_rrr(unord, _d);
+               case_rrd(unord);
+               case_brr(blt, _d);
+               case_brd(blt);
+               case_brr(ble, _d);
+               case_brd(ble);
+               case_brr(beq, _d);
+               case_brd(beq);
+               case_brr(bge, _d);
+               case_brd(bge);
+               case_brr(bgt, _d);
+               case_brd(bgt);
+               case_brr(bne, _d);
+               case_brd(bne);
+               case_brr(bunlt, _d);
+               case_brd(bunlt);
+               case_brr(bunle, _d);
+               case_brd(bunle);
+               case_brr(buneq, _d);
+               case_brd(buneq);
+               case_brr(bunge, _d);
+               case_brd(bunge);
+               case_brr(bungt, _d);
+               case_brd(bungt);
+               case_brr(bltgt, _d);
+               case_brd(bltgt);
+               case_brr(bord, _d);
+               case_brd(bord);
+               case_brr(bunord, _d);
+               case_brd(bunord);
+           case jit_code_jmpr:
+               jmpr(rn(node->u.w));
+               flush_consts(0);
+               break;
+           case jit_code_jmpi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       jmpi(temp->u.w);
+                   else {
+                       word = jmpi_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   jmpi(node->u.w);
+               flush_consts(0);
+               break;
+           case jit_code_callr:
+               callr(rn(node->u.w));
+               break;
+           case jit_code_calli:
+               if (node->flag & jit_flag_node) {
+                   temp = node->u.n;
+                   assert(temp->code == jit_code_label ||
+                          temp->code == jit_code_epilog);
+                   if (temp->flag & jit_flag_patch)
+                       calli(temp->u.w);
+                   else {
+                       word = calli_p(_jit->pc.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   calli(node->u.w);
+               break;
+           case jit_code_prolog:
+               _jitc->function = _jitc->functions.ptr + node->w.w;
+               undo.node = node;
+               undo.word = _jit->pc.w;
+#if DEVEL_DISASSEMBLER
+               undo.prevw = prevw;
+#endif
+               undo.data = _jitc->consts.data;
+               undo.const_offset = _jitc->consts.offset;
+               undo.patch_offset = _jitc->patches.offset;
+#if DISASSEMBLER
+               if (_jitc->data_info.ptr)
+                   undo.info_offset = _jitc->data_info.offset;
+#endif
+           restart_function:
+               _jitc->again = 0;
+               prolog(node);
+               break;
+           case jit_code_epilog:
+               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
+               if (_jitc->again) {
+                   for (temp = undo.node->next;
+                        temp != node; temp = temp->next) {
+                       if (temp->code == jit_code_label ||
+                           temp->code == jit_code_epilog)
+                           temp->flag &= ~jit_flag_patch;
+                   }
+                   temp->flag &= ~jit_flag_patch;
+                   node = undo.node;
+                   _jit->pc.w = undo.word;
+#if DEVEL_DISASSEMBLER
+                   prevw = undo.prevw;
+#endif
+                   invalidate_consts();
+                   _jitc->consts.data = undo.data;
+                   _jitc->consts.offset = undo.const_offset;
+                   _jitc->patches.offset = undo.patch_offset;
+#if DISASSEMBLER
+                   if (_jitc->data_info.ptr)
+                       _jitc->data_info.offset = undo.info_offset;
+#endif
+                   goto restart_function;
+               }
+               /* remember label is defined */
+               node->flag |= jit_flag_patch;
+               node->u.w = _jit->pc.w;
+               epilog(node);
+               _jitc->function = NULL;
+               flush_consts(0);
+               break;
+           case jit_code_movr_w_f:
+               movr_w_f(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movr_f_w:
+               movr_f_w(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_movi_f_w:
+               movi_f_w(rn(node->u.w), node->v.f);
+               break;
+           case jit_code_movi_w_f:
+               movi_w_f(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_movr_ww_d:
+               movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_movr_d_ww:
+               movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_movi_d_ww:
+               movi_d_ww(rn(node->u.w), rn(node->v.w), node->w.d);
+               break;
+           case jit_code_movi_ww_d:
+               movi_ww_d(rn(node->u.w), node->v.w, node->w.w);
+               break;
+           case jit_code_va_start:
+               vastart(rn(node->u.w));
+               break;
+           case jit_code_va_arg:
+               vaarg(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_va_arg_d:
+               vaarg_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_live:                 case jit_code_ellipsis:
+           case jit_code_va_push:
+           case jit_code_allocai:              case jit_code_allocar:
+           case jit_code_arg_c:                case jit_code_arg_s:
+           case jit_code_arg_i:
+           case jit_code_arg_f:                case jit_code_arg_d:
+           case jit_code_va_end:
+           case jit_code_ret:
+           case jit_code_retr_c:               case jit_code_reti_c:
+           case jit_code_retr_uc:              case jit_code_reti_uc:
+           case jit_code_retr_s:               case jit_code_reti_s:
+           case jit_code_retr_us:              case jit_code_reti_us:
+           case jit_code_retr_i:               case jit_code_reti_i:
+           case jit_code_retr_f:               case jit_code_reti_f:
+           case jit_code_retr_d:               case jit_code_reti_d:
+           case jit_code_getarg_c:             case jit_code_getarg_uc:
+           case jit_code_getarg_s:             case jit_code_getarg_us:
+           case jit_code_getarg_i:             case jit_code_getarg_ui:
+           case jit_code_getarg_l:
+           case jit_code_getarg_f:             case jit_code_getarg_d:
+           case jit_code_putargr_c:            case jit_code_putargi_c:
+           case jit_code_putargr_uc:           case jit_code_putargi_uc:
+           case jit_code_putargr_s:            case jit_code_putargi_s:
+           case jit_code_putargr_us:           case jit_code_putargi_us:
+           case jit_code_putargr_i:            case jit_code_putargi_i:
+           case jit_code_putargr_f:            case jit_code_putargi_f:
+           case jit_code_putargr_d:            case jit_code_putargi_d:
+           case jit_code_pushargr_c:           case jit_code_pushargi_c:
+           case jit_code_pushargr_uc:          case jit_code_pushargi_uc:
+           case jit_code_pushargr_s:           case jit_code_pushargi_s:
+           case jit_code_pushargr_us:          case jit_code_pushargi_us:
+           case jit_code_pushargr_i:           case jit_code_pushargi_i:
+           case jit_code_pushargr_f:           case jit_code_pushargi_f:
+           case jit_code_pushargr_d:           case jit_code_pushargi_d:
+           case jit_code_retval_c:             case jit_code_retval_uc:
+           case jit_code_retval_s:             case jit_code_retval_us:
+           case jit_code_retval_i:
+           case jit_code_retval_f:             case jit_code_retval_d:
+           case jit_code_prepare:
+           case jit_code_finishr:              case jit_code_finishi:
+               break;
+           case jit_code_casr:
+               casr(rn(node->u.w), rn(node->v.w),
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_casi:
+               casi(rn(node->u.w), node->v.w,
+                    rn(node->w.q.l), rn(node->w.q.h));
+               break;
+           case jit_code_negi_f:               case jit_code_absi_f:
+           case jit_code_sqrti_f:              case jit_code_negi_d:
+           case jit_code_absi_d:               case jit_code_sqrti_d:
+               break;
+           case jit_code_negi:
+               negi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_comi:
+               comi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti_c:
+               exti_c(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti_uc:
+               exti_uc(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti_s:
+               exti_s(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti_us:
+               exti_us(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_bswapi_us:
+               bswapi_us(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_bswapi_ui:
+               bswapi_ui(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_htoni_us:
+               htoni_us(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_htoni_ui:
+               htoni_ui(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_cloi:
+               cloi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_clzi:
+               clzi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_ctoi:
+               ctoi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_ctzi:
+               ctzi(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_rbiti:
+               rbiti(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_popcnti:
+               popcnti(rn(node->u.w), node->v.w);
+               break;
+           case jit_code_exti:
+               exti(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
+               break;
+           case jit_code_exti_u:
+               exti_u(rn(node->u.w), node->v.w, node->w.q.l, node->w.q.h);
+               break;
+           default:
+               printf("ABORT MISSION (%i)\n", node->code);
+               abort();
+       }
+       jit_regarg_clr(node, value);
+       assert(_jitc->regarg == 0);
+       assert(_jitc->synth == 0);
+       /* update register live state */
+       jit_reglive(node);
+
+        _jitc->no_flag = !(node->flag & jit_flag_patch);
+
+       if (_jitc->consts.length &&
+               (jit_uword_t)_jit->pc.uc - (jit_uword_t)_jitc->consts.patches[0] >= 900) {
+               /* Maximum displacement for mov.l is +1020 bytes. If we're already +900 bytes
+                * since the first mov.l, force a flush. */
+
+               if (node->next &&
+                       node->next->code != jit_code_jmpi &&
+                       node->next->code != jit_code_jmpr &&
+                       node->next->code != jit_code_epilog) {
+                       /* insert a jump, flush constants and continue */
+                       word = _jit->pc.w;
+                       BRA(0);
+                       NOP();
+                       flush_consts(1);
+                       patch_at(word, _jit->pc.w);
+               }
+       }
+    }
+#undef case_brw
+#undef case_brr
+#undef case_wrr
+#undef case_rrw
+#undef case_rrr
+#undef case_wr
+#undef case_rw
+#undef case_rr
+
+    flush_consts(1);
+
+    for (offset = 0; offset < _jitc->patches.offset; offset++) {
+       node = _jitc->patches.ptr[offset].node;
+       word = _jitc->patches.ptr[offset].inst;
+       value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
+       patch_at(word, value);
+    }
+
+    jit_flush(_jit->code.ptr, _jit->pc.uc);
+
+    return (_jit->code.ptr);
+}
+
+#define CODE 1
+#  include "jit_rewind.c"
+#  include "jit_sh-cpu.c"
+#  include "jit_sh-fpu.c"
+#  include "jit_fallback.c"
+#undef CODE
+
+void
+jit_flush(void *fptr, void *tptr)
+{
+#if defined(__linux__)
+    jit_uword_t                i, f, t, s;
+
+    s = sysconf(_SC_PAGE_SIZE);
+    f = (jit_uword_t)fptr & -s;
+    t = (((jit_uword_t)tptr) + s - 1) & -s;
+    for (i = f; i < t; i += s)
+       __clear_cache((void *)i, (void *)(i + s));
+#endif
+}
+
+void
+_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    ldxi(rn(r0), rn(r1), i0);
+}
+
+void
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    stxi(i0, rn(r0), rn(r1));
+}
+
+void
+_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+       /* No FPU support */
+}
+
+void
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+       /* No FPU support */
+}
+
+static void
+_load_const(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t          w;
+    jit_word_t          d;
+    jit_word_t          base;
+    jit_int32_t                *data;
+    jit_int32_t                 size;
+    jit_int32_t                 offset;
+
+    _jitc->consts.patches[_jitc->consts.offset++] = _jit->pc.w;
+    /* positive forward offset */
+    LDPL(r0, 0);
+
+    if (!uniq) {
+       /* search already requested values */
+       for (offset = 0; offset < _jitc->consts.length; offset++) {
+           if (_jitc->consts.values[offset] == i0) {
+               _jitc->consts.patches[_jitc->consts.offset++] = offset;
+               return;
+           }
+       }
+    }
+
+#if DEBUG
+    /* cannot run out of space because of limited range
+     * but assert anyway to catch logic errors */
+    assert(_jitc->consts.length < 1024);
+    assert(_jitc->consts.offset < 2048);
+#endif
+    _jitc->consts.patches[_jitc->consts.offset++] = _jitc->consts.length;
+    _jitc->consts.values[_jitc->consts.length++] = i0;
+}
+
+static void
+_load_const_f(jit_state_t *_jit, jit_bool_t uniq, jit_int32_t r0, jit_float32_t f0)
+{
+    jit_word_t          w;
+    jit_word_t          d;
+    jit_word_t          base;
+    jit_int32_t                *data;
+    jit_int32_t                 size;
+    jit_int32_t                 offset;
+    union fl32 {
+           jit_int32_t i;
+           jit_float32_t f;
+    };
+    jit_uint32_t i0 = ((union fl32)f0).i;
+
+    _jitc->consts.patches[_jitc->consts.offset++] = _jit->pc.w;
+    /* positive forward offset */
+    MOVA(0);
+    LDF(r0, _R0);
+
+    if (!uniq) {
+       /* search already requested values */
+       for (offset = 0; offset < _jitc->consts.length; offset++) {
+           if (_jitc->consts.values[offset] == i0) {
+               _jitc->consts.patches[_jitc->consts.offset++] = offset;
+               return;
+           }
+       }
+    }
+
+#if DEBUG
+    /* cannot run out of space because of limited range
+     * but assert anyway to catch logic errors */
+    assert(_jitc->consts.length < 1024);
+    assert(_jitc->consts.offset < 2048);
+#endif
+    _jitc->consts.patches[_jitc->consts.offset++] = _jitc->consts.length;
+    _jitc->consts.values[_jitc->consts.length++] = i0;
+}
+
+static void
+_flush_consts(jit_state_t *_jit, jit_bool_t force)
+{
+    jit_word_t          word;
+    jit_int32_t                 offset;
+
+    /* if no forward constants */
+    if (!_jitc->consts.length)
+       return;
+
+    word = _jit->code.length - (_jit->pc.uc - _jit->code.ptr)
+           - (_jitc->consts.length << 1);
+    if (!force && word < 1024)
+       return;
+
+    /* Align to 32 bits */
+    if (_jit->pc.w & 0x3)
+           NOP();
+
+    word = _jit->pc.w;
+    _jitc->consts.data = _jit->pc.uc;
+    _jitc->consts.size = _jitc->consts.length << 2;
+    /* FIXME check will not overrun, otherwise, need to reallocate
+     * code buffer and start over */
+    jit_memcpy(_jitc->consts.data, _jitc->consts.values, _jitc->consts.size);
+    _jit->pc.w += _jitc->consts.size;
+
+#if DISASSEMBLER
+    if (_jitc->data_info.ptr) {
+       if (_jitc->data_info.offset >= _jitc->data_info.length) {
+           jit_realloc((jit_pointer_t *)&_jitc->data_info.ptr,
+                       _jitc->data_info.length * sizeof(jit_data_info_t),
+                       (_jitc->data_info.length + 1024) *
+                       sizeof(jit_data_info_t));
+           _jitc->data_info.length += 1024;
+       }
+       _jitc->data_info.ptr[_jitc->data_info.offset].code = word;
+       _jitc->data_info.ptr[_jitc->data_info.offset].length = _jitc->consts.size;
+       ++_jitc->data_info.offset;
+    }
+#endif
+
+    for (offset = 0; offset < _jitc->consts.offset; offset += 2)
+       patch_at(_jitc->consts.patches[offset],
+                word + (_jitc->consts.patches[offset + 1] << 2));
+    _jitc->consts.length = _jitc->consts.offset = 0;
+}
+
+/* to be called if needing to start over a function */
+static void
+_invalidate_consts(jit_state_t *_jit)
+{
+    /* if no forward constants */
+    if (_jitc->consts.length)
+       _jitc->consts.length = _jitc->consts.offset = 0;
+}
+
+static void
+_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
+{
+    jit_int32_t                 flag;
+
+    assert(node->flag & jit_flag_node);
+    if (node->code == jit_code_movi)
+       flag = node->v.n->flag;
+    else
+       flag = node->u.n->flag;
+    assert(!(flag & jit_flag_patch));
+    if (_jitc->patches.offset >= _jitc->patches.length) {
+       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
+                   _jitc->patches.length * sizeof(jit_patch_t),
+                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
+       _jitc->patches.length += 1024;
+    }
+    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
+    _jitc->patches.ptr[_jitc->patches.offset].node = node;
+    ++_jitc->patches.offset;
+}
+
+static jit_node_t *
+_jit_make_arg(jit_state_t *_jit, jit_node_t *node, jit_code_t code)
+{
+    jit_int32_t                 offset;
+
+    if (jit_arg_reg_p(_jitc->function->self.argi))
+       offset = _jitc->function->self.argi++;
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += STACK_SLOT;
+    }
+
+    if (node == (jit_node_t *)0)
+       node = jit_new_node(code);
+    else
+       link_node(node);
+
+    node->u.w = offset;
+    node->v.w = ++_jitc->function->self.argn;
+    jit_link_prolog();
+
+    return (node);
+}
+
+static jit_node_t *
+_jit_make_arg_f(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                 offset;
+
+    if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
+       offset = _jitc->function->self.argf++;
+    }
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += STACK_SLOT;
+    }
+    if (node == (jit_node_t *)0)
+       node = jit_new_node(jit_code_arg_f);
+    else
+       link_node(node);
+    node->u.w = offset;
+    node->v.w = ++_jitc->function->self.argn;
+    jit_link_prolog();
+    return (node);
+}
+
+static jit_node_t *
+_jit_make_arg_d(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                 offset;
+    if (jit_arg_f_reg_p(_jitc->function->self.argf)) {
+       offset = (_jitc->function->self.argf + 1) & ~1;
+       _jitc->function->self.argf = offset + 2;
+    }
+    else {
+       offset = _jitc->function->self.size;
+       _jitc->function->self.size += STACK_SLOT * 2;
+    }
+    if (node == (jit_node_t *)0)
+       node = jit_new_node(jit_code_arg_d);
+    else
+       link_node(node);
+    node->u.w = offset;
+    node->v.w = ++_jitc->function->self.argn;
+    jit_link_prolog();
+    return (node);
+}
+
+jit_node_t *
+_jit_arg(jit_state_t *_jit, jit_code_t code)
+{
+    assert(_jitc->function);
+    assert(!(_jitc->function->self.call & jit_call_varargs));
+    return (jit_make_arg((jit_node_t*)0, code));
+}
+
+void
+_jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert_arg_type(v->code, jit_code_arg_c);
+    jit_inc_synth_wp(getarg_c, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_c(u, JIT_RA0 + v->u.w);
+    else
+       jit_ldxi_c(u, JIT_FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert_arg_type(v->code, jit_code_arg_c);
+    jit_inc_synth_wp(getarg_uc, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_uc(u, JIT_RA0 + v->u.w);
+    else
+       jit_ldxi_uc(u, JIT_FP, v->u.w + C_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert_arg_type(v->code, jit_code_arg_s);
+    jit_inc_synth_wp(getarg_s, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_s(u, JIT_RA0 + v->u.w);
+    else
+       jit_ldxi_s(u, JIT_FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert_arg_type(v->code, jit_code_arg_s);
+    jit_inc_synth_wp(getarg_us, u, v);
+    if (jit_arg_reg_p(v->u.w))
+       jit_extr_us(u, JIT_RA0 + v->u.w);
+    else
+       jit_ldxi_us(u, JIT_FP, v->u.w + S_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert_arg_type(v->code, jit_code_arg_i);
+    jit_inc_synth_wp(getarg_i, u, v);
+    if (jit_arg_reg_p(v->u.w)) {
+       jit_movr(u, JIT_RA0 + v->u.w);
+    }
+    else
+       jit_ldxi_i(u, JIT_FP, v->u.w + I_DISP);
+    jit_dec_synth();
+}
+
+void
+_jit_pushargr(jit_state_t *_jit, jit_int32_t u, jit_code_t code)
+{
+    jit_code_inc_synth_w(code, u);
+    jit_link_prepare();
+    assert(_jitc->function);
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movr(JIT_RA0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       jit_stxi(_jitc->function->call.size, JIT_SP, u);
+       _jitc->function->call.size += STACK_SLOT;
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_pushargi(jit_state_t *_jit, jit_word_t u, jit_code_t code)
+{
+    jit_int32_t                regno;
+    assert(_jitc->function);
+    jit_code_inc_synth_w(code, u);
+    jit_link_prepare();
+    if (jit_arg_reg_p(_jitc->function->call.argi)) {
+       jit_movi(JIT_RA0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
+       regno = jit_get_reg(jit_class_gpr);
+       jit_movi(regno, u);
+       jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+       _jitc->function->call.size += STACK_SLOT;
+       jit_unget_reg(regno);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_finishr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishr, r0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    call = jit_callr(r0);
+    call->v.w = _jitc->function->self.argi;
+    call->w.w = _jitc->function->self.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+}
+
+jit_node_t *
+_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
+{
+    jit_node_t         *call;
+    assert(_jitc->function);
+    jit_inc_synth_w(finishi, (jit_word_t)i0);
+    if (_jitc->function->self.alen < _jitc->function->call.size)
+       _jitc->function->self.alen = _jitc->function->call.size;
+    call = jit_calli(i0);
+    call->v.w = _jitc->function->call.argi;
+    call->w.w = _jitc->function->call.argf;
+    _jitc->function->call.argi = _jitc->function->call.argf =
+       _jitc->function->call.size = 0;
+    _jitc->prepare = 0;
+    jit_dec_synth();
+    return (call);
+}
+
+void
+_jit_retval_c(jit_state_t *_jit, jit_int32_t r0)
+{
+       jit_extr_c(r0, JIT_RET);
+}
+
+void
+_jit_retval_uc(jit_state_t *_jit, jit_int32_t r0)
+{
+       jit_extr_uc(r0, JIT_RET);
+}
+
+void
+_jit_retval_s(jit_state_t *_jit, jit_int32_t r0)
+{
+       jit_extr_s(r0, JIT_RET);
+}
+
+void
+_jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
+{
+       jit_extr_us(r0, JIT_RET);
+}
+
+void
+_jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
+{
+       jit_movr(r0, JIT_RET);
+}
+
+void
+_jit_ellipsis(jit_state_t *_jit)
+{
+    jit_inc_synth(ellipsis);
+    if (_jitc->prepare) {
+       jit_link_prepare();
+       assert(!(_jitc->function->call.call & jit_call_varargs));
+       _jitc->function->call.call |= jit_call_varargs;
+    }
+    else {
+       jit_link_prolog();
+       assert(!(_jitc->function->self.call & jit_call_varargs));
+       _jitc->function->self.call |= jit_call_varargs;
+       _jitc->function->vagp = _jitc->function->self.argi;
+       _jitc->function->vafp = _jitc->function->self.argf;
+       _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t)
+                                            /* +1 to ensure 8-byte alignment */
+                                            + (NUM_WORD_ARGS + NUM_FLOAT_ARGS + 1) * 4);
+    }
+    jit_dec_synth();
+}
+
+void
+_jit_va_push(jit_state_t *_jit, jit_int32_t u)
+{
+       jit_int32_t i, reg;
+       jit_inc_synth_w(va_push, u);
+
+       reg = jit_get_reg(jit_class_gpr);
+
+       for (i = 0; i < 5; i++) {
+               jit_ldxi(reg, u, i * 4);
+               jit_stxi(_jitc->function->call.size + i * 4, JIT_SP, reg);
+       }
+
+       jit_unget_reg(reg);
+
+       _jitc->function->call.size += 5 * 4;
+
+       jit_dec_synth();
+}
+
+jit_bool_t
+_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
+{
+    if (u->code >= jit_code_arg_c && u->code <= jit_code_arg)
+       return (jit_arg_reg_p(u->u.w));
+
+    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
+    return (jit_arg_f_reg_p(u->u.w));
+}
+
+jit_node_t *
+_jit_arg_f(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    return (jit_make_arg_f((jit_node_t*)0));
+}
+
+jit_node_t *
+_jit_arg_d(jit_state_t *_jit)
+{
+    assert(_jitc->function);
+    return (jit_make_arg_d((jit_node_t*)0));
+}
+
+void
+_jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
+{
+       jit_movr_f(r0, JIT_FRET);
+}
+
+void
+_jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
+{
+       jit_movr_d(r0, JIT_FRET);
+}
+
+void
+_jit_retr_f(jit_state_t *_jit, jit_int32_t u)
+{
+       jit_inc_synth_w(retr_f, u);
+       jit_movr_f(JIT_FRET, u);
+       jit_live(JIT_FRET);
+       jit_ret();
+       jit_dec_synth();
+}
+
+void
+_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
+{
+       jit_inc_synth_w(reti_f, u);
+       jit_movi_f(JIT_FRET, u);
+       jit_live(JIT_FRET);
+       jit_ret();
+       jit_dec_synth();
+}
+
+void
+_jit_retr_d(jit_state_t *_jit, jit_int32_t u)
+{
+       jit_inc_synth_w(retr_d, u);
+       jit_movr_d(JIT_FRET, u);
+       jit_live(JIT_FRET);
+       jit_ret();
+       jit_dec_synth();
+}
+
+void
+_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
+{
+       jit_inc_synth_w(reti_d, u);
+       jit_movi_d(JIT_FRET, u);
+       jit_live(JIT_FRET);
+       jit_ret();
+       jit_dec_synth();
+}
+
+void
+_jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
+{
+       jit_int32_t regno;
+
+       jit_inc_synth_w(pushargr_f, u);
+       jit_link_prepare();
+       assert(_jitc->function);
+       if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+               jit_movr_f(JIT_FA0 + (_jitc->function->call.argf ^ fpr_args_inverted()), u);
+               ++_jitc->function->call.argf;
+       }
+       else {
+               jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
+               _jitc->function->call.size += STACK_SLOT;
+       }
+       jit_dec_synth();
+}
+
+void
+_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+{
+       jit_int32_t regno;
+
+       jit_inc_synth_w(pushargi_f, u);
+       jit_link_prepare();
+       assert(_jitc->function);
+       if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
+               jit_movi_f(JIT_FA0 + (_jitc->function->call.argf ^ fpr_args_inverted()), u);
+               ++_jitc->function->call.argf;
+       }
+       else {
+               regno = jit_get_reg(jit_class_fpr);
+               jit_movi_f(regno, u);
+               jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
+               _jitc->function->call.size += STACK_SLOT;
+               jit_unget_reg(regno);
+       }
+       jit_dec_synth();
+}
+
+void
+_jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
+{
+       jit_int32_t regno;
+       jit_inc_synth_w(pushargr_d, u);
+       jit_link_prepare();
+       assert(_jitc->function);
+
+       regno = (_jitc->function->call.argf + 1) & ~1;
+       if (jit_arg_f_reg_p(regno)) {
+               jit_movr_d(JIT_FA0 + regno, u);
+               _jitc->function->call.argf = regno + 2;
+       }
+       else {
+               jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
+               _jitc->function->call.size += STACK_SLOT * 2;
+       }
+       jit_dec_synth();
+}
+
+void
+_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
+{
+       jit_int32_t regno;
+
+       jit_inc_synth_w(pushargi_d, u);
+       jit_link_prepare();
+       assert(_jitc->function);
+
+       regno = (_jitc->function->call.argf + 1) & ~1;
+       if (jit_arg_f_reg_p(regno)) {
+               jit_movi_d(JIT_FA0 + regno, u);
+               _jitc->function->call.argf = regno + 2;
+       }
+       else {
+               regno = jit_get_reg(jit_class_fpr);
+               jit_movi_d(regno, u);
+               jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
+               _jitc->function->call.size += STACK_SLOT * 2;
+               jit_unget_reg(regno);
+       }
+       jit_dec_synth();
+}
+
+void
+_jit_putargr_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+       assert(v->code == jit_code_arg_f);
+       jit_inc_synth_wp(putargr, u, v);
+       if (jit_arg_f_reg_p(v->u.w))
+               jit_movr_f(JIT_FA0 + (v->u.w ^ fpr_args_inverted()), u);
+       else
+               jit_stxi_f(v->u.w, JIT_FP, u);
+       jit_dec_synth();
+}
+
+void
+_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
+{
+       jit_int32_t regno;
+
+       assert(v->code == jit_code_arg_f);
+       jit_inc_synth_wp(putargi, u, v);
+       if (jit_arg_f_reg_p(v->u.w)) {
+               jit_movi_f(JIT_FA0 + (v->u.w ^ fpr_args_inverted()), u);
+       } else {
+               regno = jit_get_reg(jit_class_fpr);
+
+               jit_movi_f(regno, u);
+               jit_stxi_f(v->u.w, JIT_FP, regno);
+
+               jit_unget_reg(regno);
+       }
+       jit_dec_synth();
+}
+
+void
+_jit_putargr_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+       assert(v->code == jit_code_arg_d);
+       jit_inc_synth_wp(putargr, u, v);
+       if (jit_arg_f_reg_p(v->u.w))
+               jit_movr_d(JIT_FA0 + v->u.w, u);
+       else
+               jit_stxi_d(v->u.w, JIT_FP, u);
+       jit_dec_synth();
+}
+
+void
+_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
+{
+       jit_int32_t regno;
+
+       assert(v->code == jit_code_arg_d);
+       jit_inc_synth_wp(putargi, u, v);
+       if (jit_arg_f_reg_p(v->u.w)) {
+               jit_movi_d(JIT_FA0 + v->u.w, u);
+       } else {
+               regno = jit_get_reg(jit_class_fpr);
+
+               jit_movi_d(regno, u);
+               jit_stxi_d(v->u.w, JIT_FP, regno);
+
+               jit_unget_reg(regno);
+       }
+       jit_dec_synth();
+}
+
+void
+_jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_d);
+    jit_inc_synth_wp(getarg_d, u, v);
+
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_d(u, JIT_FA0 + v->u.w);
+    else
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+
+    jit_dec_synth();
+}
+
+void
+_jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
+{
+    assert(v->code == jit_code_arg_f);
+    jit_inc_synth_wp(getarg_f, u, v);
+
+    if (jit_arg_f_reg_p(v->u.w))
+       jit_movr_f(u, JIT_FA0 + (v->u.w ^ fpr_args_inverted()));
+    else
+       jit_ldxi_f(u, JIT_FP, v->u.w);
+
+    jit_dec_synth();
+}
index 143a5d9..24a5c95 100644 (file)
@@ -54,6 +54,8 @@ static jit_int16_t    _szs[jit_code_last_code] = {
 #    include "jit_riscv-sz.c"
 #  elif defined(__loongarch__)
 #    include "jit_loongarch-sz.c"
+#  elif defined(__sh__)
+#    include "jit_sh-sz.c"
 #  endif
 #endif
 };
index 8a4ce7b..0fd494d 100644 (file)
     16, /* hmuli */
     8, /* hmulr_u */
     16, /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    8, /* ldxar_c */
+    8, /* ldxai_c */
+    8, /* ldxbr_uc */
+    8, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    8, /* ldxbr_s */
+    8, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    8, /* ldxbr_us */
+    8, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    8, /* ldxbr_f */
+    8, /* ldxbi_f */
+    8, /* ldxar_f */
+    8, /* ldxai_f */
+    8, /* ldxbr_d */
+    8, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    8, /* stxbr_c */
+    8, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    8, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    8, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    8, /* stxbr_f */
+    8, /* stxbi_f */
+    8, /* stxar_f */
+    8, /* stxai_f */
+    8, /* stxbr_d */
+    8, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __WORDSIZE */
 
 #if __WORDSIZE == 64
     60, /* hmuli */
     44, /* hmulr_u */
     60, /* hmuli_u */
+    8, /* ldxbr_c */
+    8, /* ldxbi_c */
+    8, /* ldxar_c */
+    8, /* ldxai_c */
+    8, /* ldxbr_uc */
+    8, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    8, /* ldxai_uc */
+    8, /* ldxbr_s */
+    8, /* ldxbi_s */
+    8, /* ldxar_s */
+    8, /* ldxai_s */
+    8, /* ldxbr_us */
+    8, /* ldxbi_us */
+    8, /* ldxar_us */
+    8, /* ldxai_us */
+    8, /* ldxbr_i */
+    8, /* ldxbi_i */
+    8, /* ldxar_i */
+    8, /* ldxai_i */
+    8, /* ldxbr_ui */
+    8, /* ldxbi_ui */
+    8, /* ldxar_ui */
+    8, /* ldxai_ui */
+    8, /* ldxbr_l */
+    8, /* ldxbi_l */
+    8, /* ldxar_l */
+    8, /* ldxai_l */
+    12, /* ldxbr_f */
+    12, /* ldxbi_f */
+    12, /* ldxar_f */
+    12, /* ldxai_f */
+    8, /* ldxbr_d */
+    8, /* ldxbi_d */
+    8, /* ldxar_d */
+    8, /* ldxai_d */
+    8, /* stxbr_c */
+    8, /* stxbi_c */
+    8, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    8, /* stxbi_s */
+    8, /* stxar_s */
+    8, /* stxai_s */
+    8, /* stxbr_i */
+    8, /* stxbi_i */
+    8, /* stxar_i */
+    8, /* stxai_i */
+    8, /* stxbr_l */
+    8, /* stxbi_l */
+    8, /* stxar_l */
+    8, /* stxai_l */
+    12, /* stxbr_f */
+    12, /* stxbi_f */
+    12, /* stxar_f */
+    12, /* stxai_f */
+    8, /* stxbr_d */
+    8, /* stxbi_d */
+    8, /* stxar_d */
+    8, /* stxai_d */
 #endif /* __WORDSIZE */
index bd8756d..f9a20f6 100644 (file)
@@ -1287,6 +1287,26 @@ _emit_code(jit_state_t *_jit)
                name##r##type(rn(node->u.w),                            \
                              rn(node->v.w), rn(node->w.w));            \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               generic_##name##i##type(rn(node->u.w),                  \
+                                       rn(node->v.w), node->w.w);      \
+              break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w),                  \
+                                       rn(node->v.w), rn(node->w.w));  \
+               break
+#define case_xrr(name, type)                                           \
+               case jit_code_##name##i##type:                          \
+               generic_##name##i##type(node->u.w, rn(node->v.w),       \
+                                       rn(node->w.w));                 \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               generic_##name##r##type(rn(node->u.w), rn(node->v.w),   \
+                                       rn(node->w.w));                 \
+               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1539,6 +1559,26 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+#if __WORDSIZE == 64
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+#endif
+               case_rrx(ldxb, _f);     case_rrX(ldxb, _f);
+               case_rrx(ldxa, _f);     case_rrX(ldxa, _f);
+               case_rrx(ldxb, _d);     case_rrX(ldxb, _d);
+               case_rrx(ldxa, _d);     case_rrX(ldxa, _d);
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -1567,6 +1607,20 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+#if __WORDSIZE == 64
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+#endif
+               case_xrr(stxb, _f);     case_rrX(stxb, _f);
+               case_xrr(stxa, _f);     case_rrX(stxa, _f);
+               case_xrr(stxb, _d);     case_rrX(stxb, _d);
+               case_xrr(stxa, _d);     case_rrX(stxa, _d);
                case_rr(hton, _us);
                case_rr(hton, _ui);
 #if __WORDSIZE == 64
@@ -2186,6 +2240,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_rrrw
 #undef case_rrw
 #undef case_rrrr
+#undef case_rrx
+#undef case_rrX
+#undef case_xrr
+#undef case_Xrr
 #undef case_rrr
 #undef case_rf
 #undef case_wr
index 76f90ec..6957adf 100644 (file)
@@ -570,6 +570,45 @@ static void _ldxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 static void _ldxi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #    endif
 #  endif
+#  define ldxbr_c(r0, r1, r2)          generic_ldxbr_c(r0, r1, r2)
+#  define ldxbi_c(r0, r1, i0)          generic_ldxbi_c(r0, r1, i0)
+#  define ldxbr_uc(r0, r1, r2)         generic_ldxbr_uc(r0, r1, r2)
+#  define ldxbi_uc(r0, r1, i0)         generic_ldxbi_uc(r0, r1, i0)
+#  define ldxbr_s(r0, r1, r2)          generic_ldxbr_s(r0, r1, r2)
+#  define ldxbi_s(r0, r1, i0)          generic_ldxbi_s(r0, r1, i0)
+#  define ldxbr_us(r0, r1, r2)         generic_ldxbr_us(r0, r1, r2)
+#  define ldxbi_us(r0, r1, i0)         generic_ldxbi_us(r0, r1, i0)
+#  define ldxbr_i(r0, r1, r2)          generic_ldxbr_i(r0, r1, r2)
+#  define ldxbi_i(r0, r1, i0)          generic_ldxbi_i(r0, r1, i0)
+#  if __X64 && !__X64_32
+#    define ldxbr_ui(r0, r1, i0)       generic_ldxbr_ui(r0, r1, i0)
+#    define ldxbi_ui(r0, r1, i0)       generic_ldxbi_ui(r0, r1, i0)
+#    define ldxbr_l(r0, r1, r2)                generic_ldxbr_l(r0, r1, r2)
+#    define ldxbi_l(r0, r1, i0)                generic_ldxbi_l(r0, r1, i0)
+#  endif
+#  define ldxar_c(r0, r1, r2)          generic_ldxar_c(r0, r1, r2)
+#  define ldxai_c(r0, r1, i0)          _ldxai_c(_jit,r0, r1, i0)
+static void _ldxai_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxar_uc(r0, r1, r2)         generic_ldxar_uc(r0, r1, r2)
+#  define ldxai_uc(r0, r1, i0)         _ldxai_uc(_jit, r0, r1, i0)
+static void _ldxai_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxar_s(r0, r1, r2)          generic_ldxar_s(r0, r1, r2)
+#  define ldxai_s(r0, r1, i0)          _ldxai_s(_jit, r0, r1, i0)
+static void _ldxai_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxar_us(r0, r1, r2)         generic_ldxar_us(r0, r1, r2)
+#  define ldxai_us(r0, r1, i0)         _ldxai_us(_jit, r0, r1, i0)
+static void _ldxai_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define ldxar_i(r0, r1, r2)          generic_ldxar_i(r0, r1, r2)
+#  define ldxai_i(r0, r1, i0)          _ldxai_i(_jit, r0, r1, i0)
+static void _ldxai_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  if __X64 && !__X64_32
+#    define ldxar_ui(r0, r1, i0)       generic_ldxar_ui(r0, r1, i0)
+#    define ldxai_ui(r0, r1, i0)       _ldxai_ui(_jit, r0, r1, i0)
+static void _ldxai_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#    define ldxar_l(r0, r1, r2)                generic_ldxar_l(r0, r1, r2)
+#    define ldxai_l(r0, r1, i0)                _ldxai_l(_jit, r0, r1, i0)
+static void _ldxai_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  endif
 #  define unldr(r0, r1, i0)            generic_unldr(r0, r1, i0)
 #  define unldi(r0, i0, i1)            generic_unldi(r0, i0, i1)
 #  define unldr_u(r0, r1, i0)          generic_unldr_u(r0, r1, i0)
@@ -610,8 +649,34 @@ static void _stxr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #    define stxi_l(i0, r0, r1)         _stxi_l(_jit, i0, r0, r1)
 static void _stxi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
 #  endif
-#define unstr(r0, r1, i0)              generic_unstr(r0, r1, i0)
-#define unsti(i0, r0, i1)              generic_unsti(i0, r0, i1)
+#  define stxbr_c(r0, r1, r2)          generic_stxbr_c(r0, r1, r2)
+#  define stxbi_c(i0, r0, r1)          generic_stxbi_c(i0, r0, r1)
+#  define stxbr_s(r0, r1, r2)          generic_stxbr_s(r0, r1, r2)
+#  define stxbi_s(i0, r0, r1)          generic_stxbi_s(i0, r0, r1)
+#  define stxbr_i(r0, r1, r2)          generic_stxbr_i(r0, r1, r2)
+#  define stxbi_i(i0, r0, r1)          generic_stxbi_i(i0, r0, r1)
+#  if __X64 && !__X64_32
+#    define stxbr_l(r0, r1, r2)                generic_stxbr_l(r0, r1, r2)
+#    define stxbi_l(i0, r0, r1)                generic_stxbi_l(i0, r0, r1)
+#  endif
+
+#  define stxar_c(r0, r1, r2)          generic_stxar_c(r0, r1, r2)
+#  define stxai_c(i0, r0, r1)          _stxai_c(_jit, i0, r0, r1)
+static void _stxai_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define stxar_s(r0, r1, r2)          generic_stxar_s(r0, r1, r2)
+#  define stxai_s(i0, r0, r1)          _stxai_s(_jit, i0, r0, r1)
+static void _stxai_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define stxar_i(r0, r1, r2)          generic_stxar_i(r0, r1, r2)
+#  define stxai_i(i0, r0, r1)          _stxai_i(_jit, i0, r0, r1)
+static void _stxai_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  if __X64 && !__X64_32
+#    define stxar_l(r0, r1, r2)                generic_stxar_l(r0, r1, r2)
+#    define stxai_l(i0, r0, r1)                _stxai_l(_jit, i0, r0, r1)
+static void _stxai_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  endif
+
+#  define unstr(r0, r1, i0)            generic_unstr(r0, r1, i0)
+#  define unsti(i0, r0, i1)            generic_unsti(i0, r0, i1)
 #  define jcc(code, i0)                        _jcc(_jit, code, i0)
 #  define jo(i0)                       jcc(X86_CC_O, i0)
 #  define jno(i0)                      jcc(X86_CC_NO, i0)
@@ -806,6 +871,9 @@ static void _patch_at(jit_state_t*, jit_word_t, jit_word_t);
 #    endif
 #  endif
 #  define jit_cmov_p()                 jit_cpu.cmov
+#  define is_low_mask(im)              (((im) & 1) ? (__builtin_popcountl((im) + 1) <= 1) : 0)
+#  define is_high_mask(im)             ((im) ? (__builtin_popcountl((im) + (1 << __builtin_ctzl(im))) == 0) : 0)
+#  define unmasked_bits_count(im)      (__WORDSIZE - __builtin_popcountl(im))
 #endif
 
 #if CODE
@@ -1881,15 +1949,20 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
        ixorr(r0, r0);
     else if (i0 == -1)
        movr(r0, r1);
+    else if (r0 == r1 && can_sign_extend_int_p(i0))
+        iandi(r0, i0);
+    else if (is_low_mask(i0)) {
+        lshi(r0, r1, unmasked_bits_count(i0));
+        rshi_u(r0, r0, unmasked_bits_count(i0));
+    } else if (is_high_mask(i0)) {
+        rshi_u(r0, r1, unmasked_bits_count(i0));
+        lshi(r0, r0, unmasked_bits_count(i0));
+    }
     else if (r0 == r1) {
-       if (can_sign_extend_int_p(i0))
-           iandi(r0, i0);
-       else {
            reg = jit_get_reg(jit_class_gpr);
            movi(rn(reg), i0);
            iandr(r0, rn(reg));
            jit_unget_reg(reg);
-       }
     }
     else {
        movi(r0, i0);
@@ -3721,6 +3794,104 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 #  endif
 #endif
 
+static void
+_ldxai_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 1) {
+       /* lods %rsi, %al */
+       ic(0xac);
+       extr_c(r0, r0);
+    }
+    else
+       generic_ldxai_uc(r0, r1, i0);
+}
+
+static void
+_ldxai_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 1) {
+       /* lods %rsi, %al */
+       ic(0xac);
+       extr_uc(r0, r0);
+    }
+    else
+       generic_ldxai_uc(r0, r1, i0);
+}
+
+static void
+_ldxai_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 2) {
+       /* lods %rsi, %ax */
+       ic(0x66);
+       ic(0xad);
+       extr_s(r0, r0);
+    }
+    else
+       generic_ldxai_us(r0, r1, i0);
+}
+
+static void
+_ldxai_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 2) {
+       /* lods %rsi, %ax */
+       ic(0x66);
+       ic(0xad);
+       extr_us(r0, r0);
+    }
+    else
+       generic_ldxai_us(r0, r1, i0);
+}
+
+static void
+_ldxai_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 4) {
+       /* lods %rsi, %eax */
+       ic(0xad);
+#  if __X64 && !__X64_32
+       extr_i(r0, r0);
+#  endif
+    }
+    else
+       generic_ldxai_i(r0, r1, i0);
+}
+
+#  if __X64 && !__X64_32
+static void
+_ldxai_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 4) {
+       /* lods %rsi, %eax */
+       ic(0xad);
+       extr_ui(r0, r0);
+    }
+    else
+       generic_ldxai_ui(r0, r1, i0);
+}
+
+
+static void
+_ldxai_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RAX_REGNO && r1 == _RSI_REGNO && i0 == 8) {
+       /* lods %rsi, %rax */
+       ic(0x48);       /* rex.w */
+       ic(0xad);
+    }
+    else
+       generic_ldxai_l(r0, r1, i0);
+}
+#  endif
+
 static void
 _str_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
@@ -4045,6 +4216,57 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 }
 #endif
 
+static void
+_stxai_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 1)
+       /* stos %al, %rdi */
+       ic(0xaa);
+    else
+       generic_stxai_c(i0, r0, r1);
+}
+
+static void
+_stxai_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 2) {
+       /* stos %ax, %rdi */
+       ic(0x66);
+       ic(0xab);
+    }
+    else
+       generic_stxai_s(i0, r0, r1);
+}
+
+static void
+_stxai_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 4)
+       /* stos %eax, %rdi */
+       ic(0xab);
+    else
+       generic_stxai_i(i0, r0, r1);
+}
+
+#if __X64 && !__X64_32
+static void
+_stxai_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    /* Assume DF = 0 */
+    if (r0 == _RDI_REGNO && r1 == _RAX_REGNO && i0 == 8) {
+       /* rex.w */
+       ic(0x48);
+       /* stos %rax, %rdi */
+       ic(0xab);
+    }
+    else
+       generic_stxai_l(i0, r0, r1);
+}
+#endif
+
 static jit_word_t
 _jccs(jit_state_t *_jit, jit_int32_t code, jit_word_t i0)
 {
index 99bb625..3f91fbd 100644 (file)
@@ -1,6 +1,6 @@
 
 #if __X32
-#define JIT_INSTR_MAX 63
+#define JIT_INSTR_MAX 66
     0, /* data */
     0, /* live */
     3, /* align */
     6, /* str_d */
     10,        /* sti_d */
     7, /* stxr_d */
-    8, /* stxi_d */
+    9, /* stxi_d */
     10,        /* bltr_d */
     28,        /* blti_d */
     10,        /* bler_d */
     12,        /* qlshi */
     60,        /* qlshr_u */
     12,        /* qlshi_u */
-    59,        /* qrshr */
+    66,        /* qrshr */
     12,        /* qrshi */
     56,        /* qrshr_u */
     12,        /* qrshi_u */
     0, /* fnmai_d */
     27,        /* fnmsr_d */
     0, /* fnmsi_d */
-    18, /* hmulr */
-    23, /* hmuli */
-    18, /* hmulr_u */
-    23, /* hmuli_u */
+    18,        /* hmulr */
+    23,        /* hmuli */
+    18,        /* hmulr_u */
+    23,        /* hmuli_u */
+    5, /* ldxbr_c */
+    6, /* ldxbi_c */
+    5, /* ldxar_c */
+    6, /* ldxai_c */
+    5, /* ldxbr_uc */
+    6, /* ldxbi_uc */
+    5, /* ldxar_uc */
+    6, /* ldxai_uc */
+    5, /* ldxbr_s */
+    6, /* ldxbi_s */
+    5, /* ldxar_s */
+    6, /* ldxai_s */
+    5, /* ldxbr_us */
+    6, /* ldxbi_us */
+    5, /* ldxar_us */
+    6, /* ldxai_us */
+    4, /* ldxbr_i */
+    5, /* ldxbi_i */
+    4, /* ldxar_i */
+    5, /* ldxai_i */
+    0, /* ldxbr_ui */
+    0, /* ldxbi_ui */
+    0, /* ldxar_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbr_l */
+    0, /* ldxbi_l */
+    0, /* ldxar_l */
+    0, /* ldxai_l */
+    6, /* ldxbr_f */
+    7, /* ldxbi_f */
+    6, /* ldxar_f */
+    7, /* ldxai_f */
+    6, /* ldxbr_d */
+    7, /* ldxbi_d */
+    6, /* ldxar_d */
+    7, /* ldxai_d */
+    6, /* stxbr_c */
+    7, /* stxbi_c */
+    6, /* stxar_c */
+    7, /* stxai_c */
+    5, /* stxbr_s */
+    6, /* stxbi_s */
+    5, /* stxar_s */
+    6, /* stxai_s */
+    4, /* stxbr_i */
+    5, /* stxbi_i */
+    4, /* stxar_i */
+    5, /* stxai_i */
+    0, /* stxbr_l */
+    0, /* stxbi_l */
+    0, /* stxar_l */
+    0, /* stxai_l */
+    8, /* stxbr_f */
+    9, /* stxbi_f */
+    8, /* stxar_f */
+    9, /* stxai_f */
+    8, /* stxbr_d */
+    9, /* stxbi_d */
+    8, /* stxar_d */
+    9, /* stxai_d */
 #endif /* __X32 */
 
 #if __X64
     15,        /* qlshi */
     54,        /* qlshr_u */
     15,        /* qlshi_u */
-    53,        /* qrshr */
+    62,        /* qrshr */
     15,        /* qrshi */
     49,        /* qrshr_u */
     15,        /* qrshi_u */
     0, /* fnmai_d */
     30,        /* fnmsr_d */
     0, /* fnmsi_d */
-    17, /* hmulr */
-    27, /* hmuli */
-    17, /* hmulr_u */
-    27, /* hmuli_u */
+    17,        /* hmulr */
+    27,        /* hmuli */
+    17,        /* hmulr_u */
+    27,        /* hmuli_u */
+    8, /* ldxbi_c */
+    8, /* ldxai_c */
+    8, /* ldxbi_uc */
+    8, /* ldxai_uc */
+    8, /* ldxbi_s */
+    8, /* ldxai_s */
+    8, /* ldxbi_us */
+    8, /* ldxai_us */
+    7, /* ldxbi_i */
+    7, /* ldxai_i */
+    7, /* ldxbi_ui */
+    7, /* ldxai_ui */
+    7, /* ldxbi_l */
+    7, /* ldxai_l */
+    9, /* ldxbi_f */
+    9, /* ldxai_f */
+    9, /* ldxbi_d */
+    9, /* ldxai_d */
+    10,        /* stxbi_c */
+    10,        /* stxai_c */
+    8, /* stxbi_s */
+    8, /* stxai_s */
+    7, /* stxbi_i */
+    7, /* stxai_i */
+    7, /* stxbi_l */
+    7, /* stxai_l */
+    9, /* stxbi_f */
+    9, /* stxai_f */
+    9, /* stxbi_d */
+    9, /* stxai_d */
 #else
 
 #  if __X64_32
     10,        /* bgtr_f */
     20,        /* bgti_f */
     13,        /* bner_f */
-    23,        /* bnei_f */
+    24,        /* bnei_f */
     10,        /* bunltr_f */
     20,        /* bunlti_f */
     10,        /* bunler_f */
     6, /* str_d */
     11,        /* sti_d */
     10,        /* stxr_d */
-    9, /* stxi_d */
+    10,        /* stxi_d */
     11,        /* bltr_d */
     30,        /* blti_d */
     11,        /* bler_d */
     11,        /* bgtr_d */
     30,        /* bgti_d */
     14,        /* bner_d */
-    33,        /* bnei_d */
+    37,        /* bnei_d */
     11,        /* bunltr_d */
     30,        /* bunlti_d */
     11,        /* bunler_d */
     15,        /* qlshi */
     52,        /* qlshr_u */
     15,        /* qlshi_u */
-    51,        /* qrshr */
+    60,        /* qrshr */
     15,        /* qrshi */
     47,        /* qrshr_u */
     15,        /* qrshi_u */
     0, /* fnmai_d */
     31,        /* fnmsr_d */
     0, /* fnmsi_d */
-    15, /* hmulr */
-    21, /* hmuli */
-    15, /* hmulr_u */
-    21, /* hmuli_u */
+    15,        /* hmulr */
+    21,        /* hmuli */
+    15,        /* hmulr_u */
+    21,        /* hmuli_u */
+    9, /* ldxbi_c */
+    9, /* ldxai_c */
+    9, /* ldxbi_uc */
+    9, /* ldxai_uc */
+    9, /* ldxbi_s */
+    9, /* ldxai_s */
+    9, /* ldxbi_us */
+    9, /* ldxai_us */
+    8, /* ldxbi_i */
+    8, /* ldxai_i */
+    0, /* ldxbi_ui */
+    0, /* ldxai_ui */
+    0, /* ldxbi_l */
+    0, /* ldxai_l */
+    10,        /* ldxbi_f */
+    10,        /* ldxai_f */
+    10,        /* ldxbi_d */
+    10,        /* ldxai_d */
+    11,        /* stxbi_c */
+    11,        /* stxai_c */
+    9, /* stxbi_s */
+    9, /* stxai_s */
+    8, /* stxbi_i */
+    8, /* stxai_i */
+    0, /* stxbi_l */
+    0, /* stxai_l */
+    10,        /* stxbi_f */
+    10,        /* stxai_f */
+    10,        /* stxbi_d */
+    10,        /* stxai_d */
 #else
 
 #define JIT_INSTR_MAX 112
     27, /* hmuli */
     17, /* hmulr_u */
     27, /* hmuli_u */
+    8, /* ldxbr_c */
+    9, /* ldxbi_c */
+    8, /* ldxar_c */
+    9, /* ldxai_c */
+    8, /* ldxbr_uc */
+    9, /* ldxbi_uc */
+    8, /* ldxar_uc */
+    9, /* ldxai_uc */
+    8, /* ldxbr_s */
+    9, /* ldxbi_s */
+    8, /* ldxar_s */
+    9, /* ldxai_s */
+    8, /* ldxbr_us */
+    9, /* ldxbi_us */
+    8, /* ldxar_us */
+    9, /* ldxai_us */
+    7, /* ldxbr_i */
+    8, /* ldxbi_i */
+    7, /* ldxar_i */
+    8, /* ldxai_i */
+    7, /* ldxbr_ui */
+    8, /* ldxbi_ui */
+    7, /* ldxar_ui */
+    8, /* ldxai_ui */
+    7, /* ldxbr_l */
+    8, /* ldxbi_l */
+    7, /* ldxar_l */
+    8, /* ldxai_l */
+    9, /* ldxbr_f */
+    10,        /* ldxbi_f */
+    9, /* ldxar_f */
+    10,        /* ldxai_f */
+    9, /* ldxbr_d */
+    10,        /* ldxbi_d */
+    9, /* ldxar_d */
+    10,        /* ldxai_d */
+    7, /* stxbr_c */
+    8, /* stxbi_c */
+    7, /* stxar_c */
+    8, /* stxai_c */
+    8, /* stxbr_s */
+    9, /* stxbi_s */
+    8, /* stxar_s */
+    9, /* stxai_s */
+    7, /* stxbr_i */
+    8, /* stxbi_i */
+    7, /* stxar_i */
+    8, /* stxai_i */
+    7, /* stxbr_l */
+    8, /* stxbi_l */
+    7, /* stxar_l */
+    8, /* stxai_l */
+    9, /* stxbr_f */
+    10,        /* stxbi_f */
+    9, /* stxar_f */
+    10,        /* stxai_f */
+    9, /* stxbr_d */
+    10,        /* stxbi_d */
+    9, /* stxar_d */
+    10,        /* stxai_d */
 #endif /* __CYGWIN__ || _WIN32 */
 #  endif /* __X64_32 */
 #endif /* __X64 */
index dd4fccd..58bf9ae 100644 (file)
@@ -1600,6 +1600,24 @@ _emit_code(jit_state_t *_jit)
            case jit_code_##name##i##type:                              \
                name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
                break
+#define case_rrx(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
+#define case_rrX(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w),                            \
+                             rn(node->v.w), rn(node->w.w));            \
+               break
+#define case_xrr(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
+               break
+#define case_Xrr(name, type)                                           \
+           case jit_code_##name##r##type:                              \
+               name##r##type(rn(node->u.w), rn(node->v.w),             \
+                             rn(node->w.w));                           \
+               break
 #define case_rrrw(name, type)                                          \
            case jit_code_##name##i##type:                              \
                name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -1985,6 +2003,66 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unldi_u:
                unldi_u(rn(node->u.w), node->v.w, node->w.w);
                break;
+               case_rrx(ldxb, _c);     case_rrX(ldxb, _c);
+               case_rrx(ldxa, _c);     case_rrX(ldxa, _c);
+               case_rrx(ldxb, _uc);    case_rrX(ldxb, _uc);
+               case_rrx(ldxa, _uc);    case_rrX(ldxa, _uc);
+               case_rrx(ldxb, _s);     case_rrX(ldxb, _s);
+               case_rrx(ldxa, _s);     case_rrX(ldxa, _s);
+               case_rrx(ldxb, _us);    case_rrX(ldxb, _us);
+               case_rrx(ldxa, _us);    case_rrX(ldxa, _us);
+               case_rrx(ldxb, _i);     case_rrX(ldxb, _i);
+               case_rrx(ldxa, _i);     case_rrX(ldxa, _i);
+#if __WORDSIZE == 64
+               case_rrx(ldxb, _ui);    case_rrX(ldxb, _ui);
+               case_rrx(ldxa, _ui);    case_rrX(ldxa, _ui);
+               case_rrx(ldxb, _l);     case_rrX(ldxb, _l);
+               case_rrx(ldxa, _l);     case_rrX(ldxa, _l);
+#endif
+           case jit_code_ldxbr_f:
+               addr(rn(node->v.w), rn(node->v.w), rn(node->w.w));
+               goto L_ldxbi_f;
+           case jit_code_ldxbi_f:
+               addi(rn(node->v.w), rn(node->v.w), node->w.w);
+           L_ldxbi_f:
+               if (jit_x87_reg_p(node->u.w))
+                   x87_ldr_f(rn(node->u.w), rn(node->v.w));
+               else
+                   sse_ldr_f(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_ldxar_f:
+           case jit_code_ldxai_f:
+               if (jit_x87_reg_p(node->u.w))
+                   x87_ldr_f(rn(node->u.w), rn(node->v.w));
+               else
+                   sse_ldr_f(rn(node->u.w), rn(node->v.w));
+               if (node->code == jit_code_ldxai_f)
+                   addi(rn(node->v.w), rn(node->v.w), node->w.w);
+               else
+                   addr(rn(node->v.w), rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_ldxbr_d:
+               addr(rn(node->v.w), rn(node->v.w), rn(node->w.w));
+               goto L_ldxbi_d;
+           case jit_code_ldxbi_d:
+               addi(rn(node->v.w), rn(node->v.w), node->w.w);
+           L_ldxbi_d:
+               if (jit_x87_reg_p(node->u.w))
+                   x87_ldr_d(rn(node->u.w), rn(node->v.w));
+               else
+                   sse_ldr_d(rn(node->u.w), rn(node->v.w));
+               break;
+           case jit_code_ldxar_d:
+           case jit_code_ldxai_d:
+               if (jit_x87_reg_p(node->u.w))
+                   x87_ldr_d(rn(node->u.w), rn(node->v.w));
+               else
+                   sse_ldr_d(rn(node->u.w), rn(node->v.w));
+               if (node->code == jit_code_ldxai_d)
+                   addi(rn(node->v.w), rn(node->v.w), node->w.w);
+               else
+                   addr(rn(node->v.w), rn(node->v.w), rn(node->w.w));
+               break;
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
@@ -2011,6 +2089,60 @@ _emit_code(jit_state_t *_jit)
            case jit_code_unsti:
                unsti(node->u.w, rn(node->v.w), node->w.w);
                break;
+               case_xrr(stxb, _c);     case_Xrr(stxb, _c);
+               case_xrr(stxa, _c);     case_Xrr(stxa, _c);
+               case_xrr(stxb, _s);     case_Xrr(stxb, _s);
+               case_xrr(stxa, _s);     case_Xrr(stxa, _s);
+               case_xrr(stxb, _i);     case_Xrr(stxb, _i);
+               case_xrr(stxa, _i);     case_Xrr(stxa, _i);
+#if __WORDSIZE == 64
+               case_xrr(stxb, _l);     case_rrX(stxb, _l);
+               case_xrr(stxa, _l);     case_rrX(stxa, _l);
+#endif
+           case jit_code_stxbr_f:
+               addr(rn(node->v.w), rn(node->v.w), rn(node->u.w));
+               goto L_stxbi_f;
+           case jit_code_stxbi_f:
+               addi(rn(node->v.w), rn(node->v.w), node->u.w);
+           L_stxbi_f:
+               if (jit_x87_reg_p(node->w.w))
+                   x87_str_f(rn(node->v.w), rn(node->w.w));
+               else
+                   sse_str_f(rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_stxar_f:
+           case jit_code_stxai_f:
+               if (jit_x87_reg_p(node->w.w))
+                   x87_str_f(rn(node->v.w), rn(node->w.w));
+               else
+                   sse_str_f(rn(node->v.w), rn(node->w.w));
+               if (node->code == jit_code_stxai_f)
+                   addi(rn(node->v.w), rn(node->v.w), node->u.w);
+               else
+                   addr(rn(node->v.w), rn(node->v.w), rn(node->u.w));
+               break;
+           case jit_code_stxbr_d:
+               addr(rn(node->v.w), rn(node->v.w), rn(node->u.w));
+               goto L_stxbr_d;
+           case jit_code_stxbi_d:
+               addi(rn(node->v.w), rn(node->v.w), node->u.w);
+           L_stxbr_d:
+               if (jit_x87_reg_p(node->w.w))
+                   x87_str_d(rn(node->v.w), rn(node->w.w));
+               else
+                   sse_str_d(rn(node->v.w), rn(node->w.w));
+               break;
+           case jit_code_stxar_d:
+           case jit_code_stxai_d:
+               if (jit_x87_reg_p(node->w.w))
+                   x87_str_d(rn(node->v.w), rn(node->w.w));
+               else
+                   sse_str_d(rn(node->v.w), rn(node->w.w));
+               if (node->code == jit_code_stxai_d)
+                   addi(rn(node->v.w), rn(node->v.w), node->u.w);
+               else
+                   addr(rn(node->v.w), rn(node->v.w), rn(node->u.w));
+               break;
                case_brr(blt,);
                case_brw(blt,);
                case_brr(blt, _u);
@@ -2651,6 +2783,10 @@ _emit_code(jit_state_t *_jit)
 #undef case_wrr
 #undef case_frw
 #undef case_rrf
+#undef case_xrr
+#undef case_Xrr
+#undef case_rrx
+#undef case_rrX
 #undef case_rrw
 #undef case_frr
 #undef case_rrr
index 646d9db..643c5f1 100644 (file)
@@ -1737,6 +1737,46 @@ _jit_classify(jit_state_t *_jit, jit_code_t code)
            mask = jit_cc_a0_reg|jit_cc_a0_chg|
                   jit_cc_a1_reg|jit_cc_a1_rlh|jit_cc_a2_dbl;
            break;
+       case jit_code_ldxbi_c:  case jit_code_ldxai_c:
+       case jit_code_ldxbi_uc: case jit_code_ldxai_uc:
+       case jit_code_ldxbi_s:  case jit_code_ldxai_s:
+       case jit_code_ldxbi_us: case jit_code_ldxai_us:
+       case jit_code_ldxbi_i:  case jit_code_ldxai_i:
+       case jit_code_ldxbi_ui: case jit_code_ldxai_ui:
+       case jit_code_ldxbi_l:  case jit_code_ldxai_l:
+       case jit_code_ldxbi_f:  case jit_code_ldxai_f:
+       case jit_code_ldxbi_d:  case jit_code_ldxai_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|
+                  jit_cc_a1_reg|jit_cc_a1_dep|jit_cc_a2_int;
+           break;
+       case jit_code_ldxbr_c:  case jit_code_ldxar_c:
+       case jit_code_ldxbr_uc: case jit_code_ldxar_uc:
+       case jit_code_ldxbr_s:  case jit_code_ldxar_s:
+       case jit_code_ldxbr_us: case jit_code_ldxar_us:
+       case jit_code_ldxbr_i:  case jit_code_ldxar_i:
+       case jit_code_ldxbr_ui: case jit_code_ldxar_ui:
+       case jit_code_ldxbr_l:  case jit_code_ldxar_l:
+       case jit_code_ldxbr_f:  case jit_code_ldxar_f:
+       case jit_code_ldxbr_d:  case jit_code_ldxar_d:
+           mask = jit_cc_a0_reg|jit_cc_a0_chg|
+                  jit_cc_a1_reg|jit_cc_a1_dep|jit_cc_a2_reg;
+           break;
+       case jit_code_stxbi_c:  case jit_code_stxai_c:
+       case jit_code_stxbi_s:  case jit_code_stxai_s:
+       case jit_code_stxbi_i:  case jit_code_stxai_i:
+       case jit_code_stxbi_l:  case jit_code_stxai_l:
+       case jit_code_stxbi_f:  case jit_code_stxai_f:
+       case jit_code_stxbi_d:  case jit_code_stxai_d:
+           mask = jit_cc_a0_int|jit_cc_a1_reg|jit_cc_a1_dep|jit_cc_a2_reg;
+           break;
+       case jit_code_stxbr_c:  case jit_code_stxar_c:
+       case jit_code_stxbr_s:  case jit_code_stxar_s:
+       case jit_code_stxbr_i:  case jit_code_stxar_i:
+       case jit_code_stxbr_l:  case jit_code_stxar_l:
+       case jit_code_stxbr_f:  case jit_code_stxar_f:
+       case jit_code_stxbr_d:  case jit_code_stxar_d:
+           mask = jit_cc_a0_reg|jit_cc_a1_reg|jit_cc_a1_dep|jit_cc_a2_reg;
+           break;
        default:
            abort();
     }
@@ -1764,8 +1804,8 @@ _jit_patch_abs(jit_state_t *_jit, jit_node_t *instr, jit_pointer_t address)
        default:
 #ifndef NDEBUG
            mask = jit_classify(instr->code);
-#endif
            assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp);
+#endif
            instr->u.p = address;
     }
 }
@@ -1794,8 +1834,8 @@ _jit_patch_at(jit_state_t *_jit, jit_node_t *instr, jit_node_t *label)
        default:
 #ifndef NDEBUG
            mask = jit_classify(instr->code);
-#endif
            assert((mask & (jit_cc_a0_reg|jit_cc_a0_jmp)) == jit_cc_a0_jmp);
+#endif
            assert(label->code == jit_code_label);
            instr->u.n = label;
            break;
@@ -2580,15 +2620,18 @@ _jit_emit(jit_state_t *_jit)
 #  endif
 #  ifndef NDEBUG
        result =
-       mprotect(_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_EXEC);
 #  endif
+       mprotect(_jit->code.ptr, _jit->code.protect, PROT_READ | PROT_EXEC);
        assert(result == 0);
     }
 #endif /* HAVE_MMAP */
 
     return (_jit->code.ptr);
+
+#if HAVE_MMAP
 fail:
     return (NULL);
+#endif /* HAVE_MMAP */
 }
 
 void
@@ -4291,12 +4334,12 @@ static void _htoni_ul(jit_state_t*, jit_int32_t, jit_word_t);
 #endif
 #  define movi_f_w(r0, i0)             _movi_f_w(_jit, r0, i0)
 static void _movi_f_w(jit_state_t*, jit_int32_t, jit_float32_t);
-#if __WORDSIZE == 32 && !(defined(__mips__) && NEW_ABI)
-#  define movi_d_ww(r0, r1, i0)                _movi_d_ww(_jit, r0, r1, i0)
-static void _movi_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
-#else
+#if __WORDSIZE == 64
 #  define movi_d_w(r0, i0)             _movi_d_w(_jit, r0, i0)
 static void _movi_d_w(jit_state_t*, jit_int32_t, jit_float64_t);
+#elif !(defined(__mips__) && NEW_ABI)
+#  define movi_d_ww(r0, r1, i0)                _movi_d_ww(_jit, r0, r1, i0)
+static void _movi_d_ww(jit_state_t*, jit_int32_t, jit_int32_t, jit_float64_t);
 #endif
 #define cloi(r0, i0)                   _cloi(_jit, r0, i0)
 static void _cloi(jit_state_t*, jit_int32_t, jit_word_t);
@@ -4324,6 +4367,118 @@ static void _generic_unldr_u(jit_state_t*,
                             jit_int32_t, jit_int32_t, jit_word_t);
 #define generic_unldi_u(r0, i0, i1)    _generic_unldi_u(_jit, r0, i0, i1)
 static void _generic_unldi_u(jit_state_t*, jit_int32_t, jit_word_t, jit_word_t);
+#define generic_ldxbr_c(r0, r1, r2)    _generic_ldxbr_c(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxbr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxbi_c(r0, r1, i0)    _generic_ldxbi_c(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbi_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_ldxar_c(r0, r1, r2)    _generic_ldxar_c(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxar_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxai_c(r0, r1, i0)    _generic_ldxai_c(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxai_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_ldxbr_uc(r0, r1, r2)   _generic_ldxbr_uc(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxbr_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxbi_uc(r0, r1, i0)   _generic_ldxbi_uc(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbi_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_ldxar_uc(r0, r1, r2)   _generic_ldxar_uc(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxar_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxai_uc(r0, r1, i0)   _generic_ldxai_uc(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxai_uc(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_ldxbr_s(r0, r1, r2)    _generic_ldxbr_s(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxbr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxbi_s(r0, r1, i0)    _generic_ldxbi_s(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbi_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_ldxar_s(r0, r1, r2)    _generic_ldxar_s(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxar_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxai_s(r0, r1, i0)    _generic_ldxai_s(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxai_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_ldxbr_us(r0, r1, r2)   _generic_ldxbr_us(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxbr_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxbi_us(r0, r1, i0)   _generic_ldxbi_us(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbi_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_ldxar_us(r0, r1, r2)   _generic_ldxar_us(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxar_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxai_us(r0, r1, i0)   _generic_ldxai_us(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxai_us(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_ldxar_i(r0, r1, r2)    _generic_ldxar_i(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxar_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxai_i(r0, r1, i0)    _generic_ldxai_i(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxai_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define generic_ldxbr_i(r0, r1, r2)    _generic_ldxbr_i(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxbr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_ldxbi_i(r0, r1, i0)    _generic_ldxbi_i(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbi_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#if __WORDSIZE == 64
+#  define generic_ldxbr_ui(r0, r1, r2) _generic_ldxbr_ui(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxbr_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_ldxbi_ui(r0, r1, i0) _generic_ldxbi_ui(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbi_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define generic_ldxar_ui(r0, r1, r2) _generic_ldxar_ui(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxar_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_ldxai_ui(r0, r1, i0) _generic_ldxai_ui(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxai_ui(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define generic_ldxbr_l(r0, r1, i0)  _generic_ldxbr_l(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_ldxbi_l(r0, r1, i0)  _generic_ldxbi_l(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbi_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define generic_ldxar_l(r0, r1, i0)  _generic_ldxar_l(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxar_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_ldxai_l(r0, r1, i0)  _generic_ldxai_l(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxai_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#endif
+#if !defined(__i386__) && !defined(__x86_64__)
+#  define generic_ldxbr_f(r0, r1, r2)  _generic_ldxbr_f(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxbr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_ldxbi_f(r0, r1, i0)  _generic_ldxbi_f(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define generic_ldxar_f(r0, r1, r2)  _generic_ldxai_f(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_ldxar_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_ldxai_f(r0, r1, i0)  _generic_ldxai_f(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxai_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define generic_ldxbr_d(r0, r1, i0)  _generic_ldxbr_d(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_ldxbi_d(r0, r1, i0)  _generic_ldxbi_d(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxbi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define generic_ldxar_d(r0, r1, i0)  _generic_ldxar_d(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxar_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_ldxai_d(r0, r1, i0)  _generic_ldxai_d(_jit, r0, r1, i0)
+static maybe_unused
+void _generic_ldxai_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#endif
 #define generic_unstr(r0, r1, i0)      _generic_unstr(_jit, r0, r1, i0)
 static void _generic_unstr(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #define generic_unsti(i0, r0, i1)      _generic_unsti(_jit, i0, r0, i1)
@@ -4340,6 +4495,82 @@ static void _generic_unstr_x(jit_state_t*,
 #  define generic_unsti_x(i0, r0, i1)  _generic_unsti_x(_jit, i0, r0, i1)
 static void _generic_unsti_x(jit_state_t*, jit_word_t, jit_int32_t, jit_word_t);
 #endif
+#define generic_stxbr_c(r0, r1, r2)    _generic_stxbr_c(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxbr_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_stxbi_c(i0, r0, r1)    _generic_stxbi_c(_jit,i0, r0, r1)
+static maybe_unused
+void _generic_stxbi_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#define generic_stxar_c(r0, r1, r2)    _generic_stxar_c(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxar_c(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_stxai_c(i0, r0, r1)    _generic_stxai_c(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxai_c(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#define generic_stxbr_s(r0, r1, r2)    _generic_stxbr_s(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxbr_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_stxbi_s(i0, r0, r1)    _generic_stxbi_s(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxbi_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#define generic_stxar_s(r0, r1, r2)    _generic_stxar_s(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxar_s(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_stxai_s(i0, r0, r1)    _generic_stxai_s(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxai_s(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#define generic_stxbr_i(r0, r1, r2)    _generic_stxbr_i(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxbr_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_stxbi_i(i0, r0, r1)    _generic_stxbi_i(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxbi_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#define generic_stxar_i(r0, r1, r2)    _generic_stxar_i(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxar_i(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define generic_stxai_i(i0, r0, r1)    _generic_stxai_i(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxai_i(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#if __WORDSIZE == 64
+#  define generic_stxbr_l(r0, r1, r2)  _generic_stxbr_l(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxbr_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_stxbi_l(i0, r0, r1)  _generic_stxbi_l(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxbi_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define generic_stxar_l(r0, r1, r2)  _generic_stxar_l(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxar_l(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_stxai_l(i0, r0, r1)  _generic_stxai_l(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxai_l(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#endif
+#if !defined(__i386__) && !defined(__x86_64__)
+#  define generic_stxbr_f(r0, r1, r2)  _generic_stxbr_f(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxbr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_stxbi_f(i0, r0, r1)  _generic_stxbi_f(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxbi_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define generic_stxar_f(r0, r1, r2)  _generic_stxar_f(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxar_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_stxai_f(i0, r0, r1)  _generic_stxai_f(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxai_f(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define generic_stxbr_d(r0, r1, r2)  _generic_stxbr_d(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxbr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_stxbi_d(i0, r0, r1)  _generic_stxbi_d(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxbi_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#  define generic_stxar_d(r0, r1, r2)  _generic_stxar_d(_jit, r0, r1, r2)
+static maybe_unused
+void _generic_stxar_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define generic_stxai_d(i0, r0, r1)  _generic_stxai_d(_jit, i0, r0, r1)
+static maybe_unused
+void _generic_stxai_d(jit_state_t*, jit_word_t, jit_int32_t, jit_int32_t);
+#endif
 #define patch_alist(revert)            _patch_alist(_jit, revert)
 static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert);
 
@@ -4367,6 +4598,8 @@ static maybe_unused void _patch_alist(jit_state_t *_jit, jit_bool_t revert);
 #  include "jit_riscv.c"
 #elif defined(__loongarch__)
 #  include "jit_loongarch.c"
+#elif defined(__sh__)
+#  include "jit_sh.c"
 #endif
 
 static maybe_unused void
@@ -4575,7 +4808,23 @@ _movi_f_w(jit_state_t *_jit, jit_int32_t r0, jit_float32_t i0)
     movi(r0, data.i);
 }
 
-#if __WORDSIZE == 32 && !(defined(__mips__) && NEW_ABI)
+#if __WORDSIZE == 64
+static void
+_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
+{
+    union {
+       jit_int64_t     l;
+       jit_float64_t   d;
+    } data;
+    data.d = i0;
+#  if defined(__ia64__)
+    /* Should be used only in this case (with out0 == 120) */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+#  endif
+    movi(r0, data.l);
+}
+#elif !(defined(__mips__) && NEW_ABI)
 static void
 _movi_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
 {
@@ -4593,23 +4842,6 @@ _movi_d_ww(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_float64_t i0)
     movi(r0, data.i[1]);
 #  endif
 }
-
-#else
-static void
-_movi_d_w(jit_state_t *_jit, jit_int32_t r0, jit_float64_t i0)
-{
-    union {
-       jit_int64_t     l;
-       jit_float64_t   d;
-    } data;
-    data.d = i0;
-#  if defined(__ia64__)
-    /* Should be used only in this case (with out0 == 120) */
-    if (r0 >= 120)
-       r0 = _jitc->rout + (r0 - 120);
-#  endif
-    movi(r0, data.l);
-}
 #endif
 
  void
@@ -5474,6 +5706,120 @@ _generic_unsti_x(jit_state_t *_jit,
 }
 #endif
 
+#define def_ldxbr_T(T)                                                 \
+static void                                                            \
+_generic_ldxbr_##T(jit_state_t *_jit,                                  \
+                       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \
+{                                                                      \
+    addr(r1, r1, r2);                                                  \
+    ldr_##T(r0, r1);                                                   \
+}
+#define def_ldxbi_T(T)                                                 \
+static void                                                            \
+_generic_ldxbi_##T(jit_state_t *_jit,                                  \
+                       jit_int32_t r0, jit_int32_t r1, jit_word_t i0)  \
+{                                                                      \
+    addi(r1, r1, i0);                                                  \
+    ldr_##T(r0, r1);                                                   \
+}
+def_ldxbr_T(c)                 def_ldxbi_T(c)
+def_ldxbr_T(uc)                        def_ldxbi_T(uc)
+def_ldxbr_T(s)                 def_ldxbi_T(s)
+def_ldxbr_T(us)                        def_ldxbi_T(us)
+def_ldxbi_T(i)                 def_ldxbr_T(i)
+#if __WORDSIZE == 64
+def_ldxbr_T(ui)                        def_ldxbi_T(ui)
+def_ldxbr_T(l)                 def_ldxbi_T(l)
+#endif
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__)
+def_ldxbr_T(f)                 def_ldxbi_T(f)
+def_ldxbr_T(d)                 def_ldxbi_T(d)
+#endif
+
+#define def_ldxar_T(T)                                                 \
+static void                                                            \
+_generic_ldxar_##T(jit_state_t *_jit,                                  \
+                       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \
+{                                                                      \
+    ldr_##T(r0, r1);                                                   \
+    addr(r1, r1, r2);                                                  \
+}
+#define def_ldxai_T(T)                                                 \
+static void                                                            \
+_generic_ldxai_##T(jit_state_t *_jit,                                  \
+                       jit_int32_t r0, jit_int32_t r1, jit_word_t i0)  \
+{                                                                      \
+    ldr_##T(r0, r1);                                                   \
+    addi(r1, r1, i0);                                                  \
+}
+def_ldxar_T(c)                 def_ldxai_T(c)
+def_ldxar_T(uc)                        def_ldxai_T(uc)
+def_ldxar_T(s)                 def_ldxai_T(s)
+def_ldxar_T(us)                        def_ldxai_T(us)
+def_ldxar_T(i)                 def_ldxai_T(i)
+#if __WORDSIZE == 64
+def_ldxar_T(ui)                        def_ldxai_T(ui)
+def_ldxar_T(l)                 def_ldxai_T(l)
+#endif
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__)
+def_ldxar_T(f)                 def_ldxai_T(f)
+def_ldxar_T(d)                 def_ldxai_T(d)
+#endif
+
+#define def_stxbr_T(T)                                                 \
+static void                                                            \
+_generic_stxbr_##T(jit_state_t *_jit,                                  \
+                       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \
+{                                                                      \
+    addr(r1, r1, r0);                                                  \
+    str_##T(r1, r2);                                                   \
+}
+#define def_stxbi_T(T)                                                 \
+static void                                                            \
+_generic_stxbi_##T(jit_state_t *_jit,                                  \
+                       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)  \
+{                                                                      \
+    addi(r0, r0, i0);                                                  \
+    str_##T(r0, r1);                                                   \
+}
+def_stxbr_T(c)                 def_stxbi_T(c)
+def_stxbr_T(s)                 def_stxbi_T(s)
+def_stxbr_T(i)                 def_stxbi_T(i)
+#if __WORDSIZE == 64
+def_stxbr_T(l)                 def_stxbi_T(l)
+#endif
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__)
+def_stxbr_T(f)                 def_stxbi_T(f)
+def_stxbr_T(d)                 def_stxbi_T(d)
+#endif
+
+#define def_stxar_T(T)                                                 \
+static void                                                            \
+_generic_stxar_##T(jit_state_t *_jit,                                  \
+                       jit_int32_t r0, jit_int32_t r1, jit_int32_t r2) \
+{                                                                      \
+    str_##T(r1, r2);                                                   \
+    addr(r1, r1, r0);                                                  \
+}
+#define def_stxai_T(T)                                                 \
+static void                                                            \
+_generic_stxai_##T(jit_state_t *_jit,                                  \
+                       jit_word_t i0, jit_int32_t r0, jit_int32_t r1)  \
+{                                                                      \
+    str_##T(r0, r1);                                                   \
+    addi(r0, r0, i0);                                                  \
+}
+def_stxar_T(c)                 def_stxai_T(c)
+def_stxar_T(s)                 def_stxai_T(s)
+def_stxar_T(i)                 def_stxai_T(i)
+#if __WORDSIZE == 64
+def_stxar_T(l)                 def_stxai_T(l)
+#endif
+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__)
+def_stxar_T(f)                 def_stxai_T(f)
+def_stxar_T(d)                 def_stxai_T(d)
+#endif
+
 #if defined(stack_framesize)
 static maybe_unused void
 _patch_alist(jit_state_t *_jit, jit_bool_t revert)